diff options
Diffstat (limited to 'drivers/gpu/drm')
459 files changed, 15013 insertions, 7491 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index a52795d9b458..c04f44a90392 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -35,41 +35,50 @@ #include "acp_gfx_if.h" -#define ACP_TILE_ON_MASK 0x03 -#define ACP_TILE_OFF_MASK 0x02 -#define ACP_TILE_ON_RETAIN_REG_MASK 0x1f -#define ACP_TILE_OFF_RETAIN_REG_MASK 0x20 - -#define ACP_TILE_P1_MASK 0x3e -#define ACP_TILE_P2_MASK 0x3d -#define ACP_TILE_DSP0_MASK 0x3b -#define ACP_TILE_DSP1_MASK 0x37 - -#define ACP_TILE_DSP2_MASK 0x2f - -#define ACP_DMA_REGS_END 0x146c0 -#define ACP_I2S_PLAY_REGS_START 0x14840 -#define ACP_I2S_PLAY_REGS_END 0x148b4 -#define ACP_I2S_CAP_REGS_START 0x148b8 -#define ACP_I2S_CAP_REGS_END 0x1496c - -#define ACP_I2S_COMP1_CAP_REG_OFFSET 0xac -#define ACP_I2S_COMP2_CAP_REG_OFFSET 0xa8 -#define ACP_I2S_COMP1_PLAY_REG_OFFSET 0x6c -#define ACP_I2S_COMP2_PLAY_REG_OFFSET 0x68 - -#define mmACP_PGFSM_RETAIN_REG 0x51c9 -#define mmACP_PGFSM_CONFIG_REG 0x51ca -#define mmACP_PGFSM_READ_REG_0 0x51cc - -#define mmACP_MEM_SHUT_DOWN_REQ_LO 0x51f8 -#define mmACP_MEM_SHUT_DOWN_REQ_HI 0x51f9 -#define mmACP_MEM_SHUT_DOWN_STS_LO 0x51fa -#define mmACP_MEM_SHUT_DOWN_STS_HI 0x51fb - -#define ACP_TIMEOUT_LOOP 0x000000FF -#define ACP_DEVS 3 -#define ACP_SRC_ID 162 +#define ACP_TILE_ON_MASK 0x03 +#define ACP_TILE_OFF_MASK 0x02 +#define ACP_TILE_ON_RETAIN_REG_MASK 0x1f +#define ACP_TILE_OFF_RETAIN_REG_MASK 0x20 + +#define ACP_TILE_P1_MASK 0x3e +#define ACP_TILE_P2_MASK 0x3d +#define ACP_TILE_DSP0_MASK 0x3b +#define ACP_TILE_DSP1_MASK 0x37 + +#define ACP_TILE_DSP2_MASK 0x2f + +#define ACP_DMA_REGS_END 0x146c0 +#define ACP_I2S_PLAY_REGS_START 0x14840 +#define ACP_I2S_PLAY_REGS_END 0x148b4 +#define ACP_I2S_CAP_REGS_START 0x148b8 +#define ACP_I2S_CAP_REGS_END 0x1496c + +#define ACP_I2S_COMP1_CAP_REG_OFFSET 0xac +#define ACP_I2S_COMP2_CAP_REG_OFFSET 0xa8 +#define ACP_I2S_COMP1_PLAY_REG_OFFSET 0x6c +#define ACP_I2S_COMP2_PLAY_REG_OFFSET 0x68 + +#define mmACP_PGFSM_RETAIN_REG 0x51c9 +#define mmACP_PGFSM_CONFIG_REG 0x51ca +#define mmACP_PGFSM_READ_REG_0 0x51cc + +#define mmACP_MEM_SHUT_DOWN_REQ_LO 0x51f8 +#define mmACP_MEM_SHUT_DOWN_REQ_HI 0x51f9 +#define mmACP_MEM_SHUT_DOWN_STS_LO 0x51fa +#define mmACP_MEM_SHUT_DOWN_STS_HI 0x51fb + +#define mmACP_CONTROL 0x5131 +#define mmACP_STATUS 0x5133 +#define mmACP_SOFT_RESET 0x5134 +#define ACP_CONTROL__ClkEn_MASK 0x1 +#define ACP_SOFT_RESET__SoftResetAud_MASK 0x100 +#define ACP_SOFT_RESET__SoftResetAudDone_MASK 0x1000000 +#define ACP_CLOCK_EN_TIME_OUT_VALUE 0x000000FF +#define ACP_SOFT_RESET_DONE_TIME_OUT_VALUE 0x000000FF + +#define ACP_TIMEOUT_LOOP 0x000000FF +#define ACP_DEVS 3 +#define ACP_SRC_ID 162 enum { ACP_TILE_P1 = 0, @@ -260,6 +269,8 @@ static int acp_hw_init(void *handle) { int r, i; uint64_t acp_base; + u32 val = 0; + u32 count = 0; struct device *dev; struct i2s_platform_data *i2s_pdata; @@ -371,6 +382,8 @@ static int acp_hw_init(void *handle) adev->acp.acp_cell[0].name = "acp_audio_dma"; adev->acp.acp_cell[0].num_resources = 4; adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; + adev->acp.acp_cell[0].platform_data = &adev->asic_type; + adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); adev->acp.acp_cell[1].name = "designware-i2s"; adev->acp.acp_cell[1].num_resources = 1; @@ -400,6 +413,46 @@ static int acp_hw_init(void *handle) } } + /* Assert Soft reset of ACP */ + val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); + + val |= ACP_SOFT_RESET__SoftResetAud_MASK; + cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val); + + count = ACP_SOFT_RESET_DONE_TIME_OUT_VALUE; + while (true) { + val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); + if (ACP_SOFT_RESET__SoftResetAudDone_MASK == + (val & ACP_SOFT_RESET__SoftResetAudDone_MASK)) + break; + if (--count == 0) { + dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); + return -ETIMEDOUT; + } + udelay(100); + } + /* Enable clock to ACP and wait until the clock is enabled */ + val = cgs_read_register(adev->acp.cgs_device, mmACP_CONTROL); + val = val | ACP_CONTROL__ClkEn_MASK; + cgs_write_register(adev->acp.cgs_device, mmACP_CONTROL, val); + + count = ACP_CLOCK_EN_TIME_OUT_VALUE; + + while (true) { + val = cgs_read_register(adev->acp.cgs_device, mmACP_STATUS); + if (val & (u32) 0x1) + break; + if (--count == 0) { + dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); + return -ETIMEDOUT; + } + udelay(100); + } + /* Deassert the SOFT RESET flags */ + val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); + val &= ~ACP_SOFT_RESET__SoftResetAud_MASK; + cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val); + return 0; } @@ -412,6 +465,8 @@ static int acp_hw_init(void *handle) static int acp_hw_fini(void *handle) { int i, ret; + u32 val = 0; + u32 count = 0; struct device *dev; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -419,6 +474,42 @@ static int acp_hw_fini(void *handle) if (!adev->acp.acp_cell) return 0; + /* Assert Soft reset of ACP */ + val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); + + val |= ACP_SOFT_RESET__SoftResetAud_MASK; + cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val); + + count = ACP_SOFT_RESET_DONE_TIME_OUT_VALUE; + while (true) { + val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); + if (ACP_SOFT_RESET__SoftResetAudDone_MASK == + (val & ACP_SOFT_RESET__SoftResetAudDone_MASK)) + break; + if (--count == 0) { + dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); + return -ETIMEDOUT; + } + udelay(100); + } + /* Disable ACP clock */ + val = cgs_read_register(adev->acp.cgs_device, mmACP_CONTROL); + val &= ~ACP_CONTROL__ClkEn_MASK; + cgs_write_register(adev->acp.cgs_device, mmACP_CONTROL, val); + + count = ACP_CLOCK_EN_TIME_OUT_VALUE; + + while (true) { + val = cgs_read_register(adev->acp.cgs_device, mmACP_STATUS); + if (val & (u32) 0x1) + break; + if (--count == 0) { + dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); + return -ETIMEDOUT; + } + udelay(100); + } + if (adev->acp.acp_genpd) { for (i = 0; i < ACP_DEVS ; i++) { dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index f66d33e4baca..f450b69323fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1766,34 +1766,32 @@ bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev) return true; } -/* Atom needs data in little endian format - * so swap as appropriate when copying data to - * or from atom. Note that atom operates on - * dw units. +/* Atom needs data in little endian format so swap as appropriate when copying + * data to or from atom. Note that atom operates on dw units. + * + * Use to_le=true when sending data to atom and provide at least + * ALIGN(num_bytes,4) bytes in the dst buffer. + * + * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4) + * byes in the src buffer. */ void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) { #ifdef __BIG_ENDIAN - u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ - u32 *dst32, *src32; + u32 src_tmp[5], dst_tmp[5]; int i; + u8 align_num_bytes = ALIGN(num_bytes, 4); - memcpy(src_tmp, src, num_bytes); - src32 = (u32 *)src_tmp; - dst32 = (u32 *)dst_tmp; if (to_le) { - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = cpu_to_le32(src32[i]); - memcpy(dst, dst_tmp, num_bytes); + memcpy(src_tmp, src, num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = cpu_to_le32(src_tmp[i]); + memcpy(dst, dst_tmp, align_num_bytes); } else { - u8 dws = num_bytes & ~3; - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = le32_to_cpu(src32[i]); - memcpy(dst, dst_tmp, dws); - if (num_bytes % 4) { - for (i = 0; i < (num_bytes % 4); i++) - dst[dws+i] = dst_tmp[dws+i]; - } + memcpy(src_tmp, src, align_num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = le32_to_cpu(src_tmp[i]); + memcpy(dst, dst_tmp, num_bytes); } #else memcpy(dst, src, num_bytes); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index f9ffe8ef0cd6..ff8efd0f8fd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -71,19 +71,33 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) struct atom_context *ctx = adev->mode_info.atom_context; int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_usagebyfirmware); + struct vram_usagebyfirmware_v2_1 * firmware_usage; + uint32_t start_addr, size; uint16_t data_offset; int usage_bytes = 0; if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) { - struct vram_usagebyfirmware_v2_1 *firmware_usage = - (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset); - + firmware_usage = (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset); DRM_DEBUG("atom firmware requested %08x %dkb fw %dkb drv\n", le32_to_cpu(firmware_usage->start_address_in_kb), le16_to_cpu(firmware_usage->used_by_firmware_in_kb), le16_to_cpu(firmware_usage->used_by_driver_in_kb)); - usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) * 1024; + start_addr = le32_to_cpu(firmware_usage->start_address_in_kb); + size = le16_to_cpu(firmware_usage->used_by_firmware_in_kb); + + if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) == + (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION << + ATOM_VRAM_OPERATION_FLAGS_SHIFT)) { + /* Firmware request VRAM reservation for SR-IOV */ + adev->fw_vram_usage.start_offset = (start_addr & + (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; + adev->fw_vram_usage.size = size << 10; + /* Use the default scratch size */ + usage_bytes = 0; + } else { + usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) << 10; + } } ctx->scratch_size_bytes = 0; if (usage_bytes == 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0b9332e65a4c..efcacb827de7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -546,7 +546,7 @@ int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb) if (offset < adev->wb.num_wb) { __set_bit(offset, adev->wb.used); - *wb = offset * 8; /* convert to dw offset */ + *wb = offset << 3; /* convert to dw offset */ return 0; } else { return -EINVAL; @@ -564,7 +564,7 @@ int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb) void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb) { if (wb < adev->wb.num_wb) - __clear_bit(wb, adev->wb.used); + __clear_bit(wb >> 3, adev->wb.used); } /** @@ -744,27 +744,6 @@ bool amdgpu_need_post(struct amdgpu_device *adev) { uint32_t reg; - if (adev->has_hw_reset) { - adev->has_hw_reset = false; - return true; - } - - /* bios scratch used on CIK+ */ - if (adev->asic_type >= CHIP_BONAIRE) - return amdgpu_atombios_scratch_need_asic_init(adev); - - /* check MEM_SIZE for older asics */ - reg = amdgpu_asic_get_config_memsize(adev); - - if ((reg != 0) && (reg != 0xffffffff)) - return false; - - return true; - -} - -static bool amdgpu_vpost_needed(struct amdgpu_device *adev) -{ if (amdgpu_sriov_vf(adev)) return false; @@ -787,7 +766,23 @@ static bool amdgpu_vpost_needed(struct amdgpu_device *adev) return true; } } - return amdgpu_need_post(adev); + + if (adev->has_hw_reset) { + adev->has_hw_reset = false; + return true; + } + + /* bios scratch used on CIK+ */ + if (adev->asic_type >= CHIP_BONAIRE) + return amdgpu_atombios_scratch_need_asic_init(adev); + + /* check MEM_SIZE for older asics */ + reg = amdgpu_asic_get_config_memsize(adev); + + if ((reg != 0) && (reg != 0xffffffff)) + return false; + + return true; } /** @@ -1951,6 +1946,7 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev) static enum amd_ip_block_type ip_order[] = { AMD_IP_BLOCK_TYPE_SMC, + AMD_IP_BLOCK_TYPE_PSP, AMD_IP_BLOCK_TYPE_DCE, AMD_IP_BLOCK_TYPE_GFX, AMD_IP_BLOCK_TYPE_SDMA, @@ -2036,12 +2032,17 @@ static int amdgpu_resume(struct amdgpu_device *adev) static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) { - if (adev->is_atom_fw) { - if (amdgpu_atomfirmware_gpu_supports_virtualization(adev)) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; - } else { - if (amdgpu_atombios_has_gpu_virtualization_table(adev)) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; + if (amdgpu_sriov_vf(adev)) { + if (adev->is_atom_fw) { + if (amdgpu_atomfirmware_gpu_supports_virtualization(adev)) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; + } else { + if (amdgpu_atombios_has_gpu_virtualization_table(adev)) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; + } + + if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)) + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); } } @@ -2208,10 +2209,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_device_detect_sriov_bios(adev); /* Post card if necessary */ - if (amdgpu_vpost_needed(adev)) { + if (amdgpu_need_post(adev)) { if (!adev->bios) { dev_err(adev->dev, "no vBIOS found\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); r = -EINVAL; goto failed; } @@ -2219,7 +2219,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_atom_asic_init(adev->mode_info.atom_context); if (r) { dev_err(adev->dev, "gpu post error!\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, 0); goto failed; } } else { @@ -3023,7 +3022,6 @@ out: } } else { dev_err(adev->dev, "asic resume failed (%d).\n", r); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, r); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { if (adev->rings[i] && adev->rings[i]->sched.thread) { kthread_unpark(adev->rings[i]->sched.thread); @@ -3037,7 +3035,6 @@ out: if (r) { /* bad news, how to tell it to userspace ? */ dev_info(adev->dev, "GPU reset failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); } else { dev_info(adev->dev, "GPU reset successed!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index f4370081f6e6..fe818501c520 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -332,12 +332,13 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, adev->gart.pages[p] = pagelist[i]; #endif - if (adev->gart.ptr) { - r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags, - adev->gart.ptr); - if (r) - return r; - } + if (!adev->gart.ptr) + return 0; + + r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags, + adev->gart.ptr); + if (r) + return r; mb(); amdgpu_gart_flush_gpu_tlb(adev, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 8b4ed8a98a18..ea25164e7f4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -369,6 +369,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, !kernel, NULL, acc_size, sg, resv, &amdgpu_ttm_bo_destroy); + if (unlikely(r != 0)) + return r; + bytes_moved = atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved; if (adev->mc.visible_vram_size < adev->mc.real_vram_size && @@ -378,9 +381,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, else amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0); - if (unlikely(r != 0)) - return r; - if (kernel) bo->tbo.priority = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index e5ece1fae149..a98fbbb4739f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -136,7 +136,8 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) if (ring->funcs->end_use) ring->funcs->end_use(ring); - amdgpu_ring_lru_touch(ring->adev, ring); + if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) + amdgpu_ring_lru_touch(ring->adev, ring); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 51eacefadea1..1f036af85ba6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -909,7 +909,8 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) placement.busy_placement = &placements; placements.fpfn = 0; placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT; - placements.flags = bo->mem.placement | TTM_PL_FLAG_TT; + placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) | + TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp, true, false); if (unlikely(r)) @@ -1192,9 +1193,6 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, unsigned long num_pages = bo->mem.num_pages; struct drm_mm_node *node = bo->mem.mm_node; - if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET) - return ttm_bo_eviction_valuable(bo, place); - switch (bo->mem.mem_type) { case TTM_PL_TT: return true; @@ -1209,7 +1207,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, num_pages -= node->size; ++node; } - break; + return false; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index b46280c1279f..2918de2f39ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -648,7 +648,7 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) uint32_t allocated = 0; uint32_t tmp, handle = 0; uint32_t *size = &tmp; - int i, r, idx = 0; + int i, r = 0, idx = 0; p->job->vm = NULL; ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c index 746b81339835..7f7097931c6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c @@ -31,7 +31,12 @@ void amdgpu_vf_error_put(struct amdgpu_device *adev, uint64_t error_data) { int index; - uint16_t error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code); + uint16_t error_code; + + if (!amdgpu_sriov_vf(adev)) + return; + + error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code); mutex_lock(&adev->virt.vf_errors.lock); index = adev->virt.vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index e97f80f86005..6738df836a70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -114,18 +114,19 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r; + unsigned long flags; uint32_t val, seq; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_rreg); - spin_lock(&kiq->ring_lock); + spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg); amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); - spin_unlock(&kiq->ring_lock); + spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); if (r < 1) { @@ -140,18 +141,19 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) { signed long r; + unsigned long flags; uint32_t seq; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_wreg); - spin_lock(&kiq->ring_lock); + spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); - spin_unlock(&kiq->ring_lock); + spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); if (r < 1) @@ -328,9 +330,11 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) sizeof(amdgim_vf2pf_info)); AMDGPU_FW_VRAM_VF2PF_READ(adev, driver_version, &str); +#ifdef MODULE if (THIS_MODULE->version != NULL) strcpy(str, THIS_MODULE->version); else +#endif strcpy(str, "N/A"); AMDGPU_FW_VRAM_VF2PF_WRITE(adev, driver_cert, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 010d14195a5e..c8c26f21993c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1244,7 +1244,7 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - int r; + int r = 0; spin_lock(&vm->status_lock); while (!list_empty(&vm->relocated)) { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index dea7c909ca5f..4e20d91d5d50 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -257,6 +257,9 @@ int psp_v10_0_cmd_submit(struct psp_context *psp, unsigned int psp_write_ptr_reg = 0; struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; struct psp_ring *ring = &psp->km_ring; + struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; + struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + + ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; struct amdgpu_device *adev = psp->adev; uint32_t ring_size_dw = ring->ring_size / 4; uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; @@ -266,9 +269,16 @@ int psp_v10_0_cmd_submit(struct psp_context *psp, /* Update KM RB frame pointer to new frame */ if ((psp_write_ptr_reg % ring_size_dw) == 0) - write_frame = ring->ring_mem; + write_frame = ring_buffer_start; else - write_frame = ring->ring_mem + (psp_write_ptr_reg / rb_frame_size_dw); + write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); + /* Check invalid write_frame ptr address */ + if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { + DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", + ring_buffer_start, ring_buffer_end, write_frame); + DRM_ERROR("write_frame is pointing to address out of bounds\n"); + return -EINVAL; + } /* Initialize KM RB frame */ memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index cee5c396b277..c7bcfe8e286c 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -367,6 +367,9 @@ int psp_v3_1_cmd_submit(struct psp_context *psp, unsigned int psp_write_ptr_reg = 0; struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; struct psp_ring *ring = &psp->km_ring; + struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; + struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + + ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; struct amdgpu_device *adev = psp->adev; uint32_t ring_size_dw = ring->ring_size / 4; uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; @@ -378,9 +381,16 @@ int psp_v3_1_cmd_submit(struct psp_context *psp, /* write_frame ptr increments by size of rb_frame in bytes */ /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */ if ((psp_write_ptr_reg % ring_size_dw) == 0) - write_frame = ring->ring_mem; + write_frame = ring_buffer_start; else - write_frame = ring->ring_mem + (psp_write_ptr_reg / rb_frame_size_dw); + write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); + /* Check invalid write_frame ptr address */ + if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { + DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", + ring_buffer_start, ring_buffer_end, write_frame); + DRM_ERROR("write_frame is pointing to address out of bounds\n"); + return -EINVAL; + } /* Initialize KM RB frame */ memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 71299c67c517..2581543b35a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -565,11 +565,7 @@ static int uvd_v6_0_suspend(void *handle) if (r) return r; - /* Skip this for APU for now */ - if (!(adev->flags & AMD_IS_APU)) - r = amdgpu_uvd_suspend(adev); - - return r; + return amdgpu_uvd_suspend(adev); } static int uvd_v6_0_resume(void *handle) @@ -577,12 +573,10 @@ static int uvd_v6_0_resume(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* Skip this for APU for now */ - if (!(adev->flags & AMD_IS_APU)) { - r = amdgpu_uvd_resume(adev); - if (r) - return r; - } + r = amdgpu_uvd_resume(adev); + if (r) + return r; + return uvd_v6_0_hw_init(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index b8ed8faf2003..6634545060fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -592,11 +592,7 @@ static int uvd_v7_0_suspend(void *handle) if (r) return r; - /* Skip this for APU for now */ - if (!(adev->flags & AMD_IS_APU)) - r = amdgpu_uvd_suspend(adev); - - return r; + return amdgpu_uvd_suspend(adev); } static int uvd_v7_0_resume(void *handle) @@ -604,12 +600,10 @@ static int uvd_v7_0_resume(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* Skip this for APU for now */ - if (!(adev->flags & AMD_IS_APU)) { - r = amdgpu_uvd_resume(adev); - if (r) - return r; - } + r = amdgpu_uvd_resume(adev); + if (r) + return r; + return uvd_v7_0_hw_init(adev); } diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index e13c67c8d2c0..bc5a2945bd2b 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -4,6 +4,6 @@ config HSA_AMD tristate "HSA kernel driver for AMD GPU devices" - depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64 + depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64 help Enable this if you want to use HSA features on AMD GPU devices. diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 211fc48697fa..3d5ccb3755d4 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -36,6 +36,7 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, /* Do not process in ISR, just request it to be forwarded to WQ. */ return (pasid != 0) && (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || + ihre->source_id == CIK_INTSRC_SDMA_TRAP || ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE); } @@ -46,6 +47,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, unsigned int pasid; const struct cik_ih_ring_entry *ihre = (const struct cik_ih_ring_entry *)ih_ring_entry; + uint32_t context_id = ihre->data & 0xfffffff; pasid = (ihre->ring_id & 0xffff0000) >> 16; @@ -53,9 +55,11 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, return; if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE) - kfd_signal_event_interrupt(pasid, 0, 0); + kfd_signal_event_interrupt(pasid, context_id, 28); + else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP) + kfd_signal_event_interrupt(pasid, context_id, 28); else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG) - kfd_signal_event_interrupt(pasid, ihre->data & 0xFF, 8); + kfd_signal_event_interrupt(pasid, context_id & 0xff, 8); else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) kfd_signal_hw_exception_event(pasid); } diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h index 79a16d24c1b8..109298b9d507 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_int.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h @@ -32,9 +32,10 @@ struct cik_ih_ring_entry { uint32_t reserved; }; -#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6 #define CIK_INTSRC_CP_END_OF_PIPE 0xB5 #define CIK_INTSRC_CP_BAD_OPCODE 0xB7 +#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6 +#define CIK_INTSRC_SDMA_TRAP 0xE0 #define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 0ef82b229754..505d39156acd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -450,8 +450,8 @@ static int kfd_ioctl_dbg_register(struct file *filep, return -EINVAL; } - mutex_lock(kfd_get_dbgmgr_mutex()); mutex_lock(&p->mutex); + mutex_lock(kfd_get_dbgmgr_mutex()); /* * make sure that we have pdd, if this the first queue created for @@ -479,8 +479,8 @@ static int kfd_ioctl_dbg_register(struct file *filep, } out: - mutex_unlock(&p->mutex); mutex_unlock(kfd_get_dbgmgr_mutex()); + mutex_unlock(&p->mutex); return status; } @@ -835,15 +835,12 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, void *data) { struct kfd_ioctl_wait_events_args *args = data; - enum kfd_event_wait_result wait_result; int err; err = kfd_wait_on_events(p, args->num_events, (void __user *)args->events_ptr, (args->wait_for_all != 0), - args->timeout, &wait_result); - - args->wait_result = wait_result; + args->timeout, &args->wait_result); return err; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 46049f005b02..621a3b53a038 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -403,7 +403,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) if (kfd->interrupts_active && interrupt_is_wanted(kfd, ih_ring_entry) && enqueue_ih_ring_entry(kfd, ih_ring_entry)) - schedule_work(&kfd->interrupt_work); + queue_work(kfd->ih_wq, &kfd->interrupt_work); spin_unlock(&kfd->interrupt_lock); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index da3b74315acf..e202921c150e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -389,12 +389,11 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) if (sched_policy != KFD_SCHED_POLICY_NO_HWS) { retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); - if (retval != 0) { + if (retval) { pr_err("unmap queue failed\n"); goto out_unlock; } - } else if (sched_policy == KFD_SCHED_POLICY_NO_HWS && - prev_active && + } else if (prev_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || q->properties.type == KFD_QUEUE_TYPE_SDMA)) { retval = mqd->destroy_mqd(mqd, q->mqd, @@ -408,24 +407,25 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) retval = mqd->update_mqd(mqd, q->mqd, &q->properties); - if (sched_policy != KFD_SCHED_POLICY_NO_HWS) - retval = map_queues_cpsch(dqm); - else if (sched_policy == KFD_SCHED_POLICY_NO_HWS && - q->properties.is_active && - (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || - q->properties.type == KFD_QUEUE_TYPE_SDMA)) - retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, - &q->properties, q->process->mm); - /* - * check active state vs. the previous state - * and modify counter accordingly + * check active state vs. the previous state and modify + * counter accordingly. map_queues_cpsch uses the + * dqm->queue_count to determine whether a new runlist must be + * uploaded. */ if (q->properties.is_active && !prev_active) dqm->queue_count++; else if (!q->properties.is_active && prev_active) dqm->queue_count--; + if (sched_policy != KFD_SCHED_POLICY_NO_HWS) + retval = map_queues_cpsch(dqm); + else if (q->properties.is_active && + (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || + q->properties.type == KFD_QUEUE_TYPE_SDMA)) + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, + &q->properties, q->process->mm); + out_unlock: mutex_unlock(&dqm->lock); return retval; @@ -467,7 +467,7 @@ static int register_process(struct device_queue_manager *dqm, mutex_lock(&dqm->lock); list_add(&n->list, &dqm->queues); - retval = dqm->ops_asic_specific.register_process(dqm, qpd); + retval = dqm->asic_ops.update_qpd(dqm, qpd); dqm->processes_count++; @@ -629,7 +629,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); - dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); + dqm->asic_ops.init_sdma_vm(dqm, q, qpd); retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) @@ -696,8 +696,6 @@ static int set_sched_resources(struct device_queue_manager *dqm) static int initialize_cpsch(struct device_queue_manager *dqm) { - int retval; - pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); mutex_init(&dqm->lock); @@ -706,11 +704,8 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->sdma_queue_count = 0; dqm->active_runlist = false; dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; - retval = dqm->ops_asic_specific.initialize(dqm); - if (retval) - mutex_destroy(&dqm->lock); - return retval; + return 0; } static int start_cpsch(struct device_queue_manager *dqm) @@ -835,7 +830,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { retval = allocate_sdma_queue(dqm, &q->sdma_id); - if (retval != 0) + if (retval) goto out; q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; @@ -850,7 +845,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, goto out; } - dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); + dqm->asic_ops.init_sdma_vm(dqm, q, qpd); retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) @@ -1095,7 +1090,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, qpd->sh_mem_ape1_limit = limit >> 16; } - retval = dqm->ops_asic_specific.set_cache_memory_policy( + retval = dqm->asic_ops.set_cache_memory_policy( dqm, qpd, default_policy, @@ -1270,11 +1265,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) switch (dev->device_info->asic_family) { case CHIP_CARRIZO: - device_queue_manager_init_vi(&dqm->ops_asic_specific); + device_queue_manager_init_vi(&dqm->asic_ops); break; case CHIP_KAVERI: - device_queue_manager_init_cik(&dqm->ops_asic_specific); + device_queue_manager_init_cik(&dqm->asic_ops); break; default: WARN(1, "Unexpected ASIC family %u", diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 31c2b1f9d320..5b77cb69f732 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -128,9 +128,8 @@ struct device_queue_manager_ops { }; struct device_queue_manager_asic_ops { - int (*register_process)(struct device_queue_manager *dqm, + int (*update_qpd)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); - int (*initialize)(struct device_queue_manager *dqm); bool (*set_cache_memory_policy)(struct device_queue_manager *dqm, struct qcm_process_device *qpd, enum cache_policy default_policy, @@ -156,7 +155,7 @@ struct device_queue_manager_asic_ops { struct device_queue_manager { struct device_queue_manager_ops ops; - struct device_queue_manager_asic_ops ops_asic_specific; + struct device_queue_manager_asic_ops asic_ops; struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; struct packet_manager packets; @@ -179,8 +178,10 @@ struct device_queue_manager { bool active_runlist; }; -void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops); -void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops); +void device_queue_manager_init_cik( + struct device_queue_manager_asic_ops *asic_ops); +void device_queue_manager_init_vi( + struct device_queue_manager_asic_ops *asic_ops); void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd); unsigned int get_queues_num(struct device_queue_manager *dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index 72c3cbabc0a7..28e48c90c596 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -32,18 +32,17 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, enum cache_policy alternate_policy, void __user *alternate_aperture_base, uint64_t alternate_aperture_size); -static int register_process_cik(struct device_queue_manager *dqm, +static int update_qpd_cik(struct device_queue_manager *dqm, struct qcm_process_device *qpd); -static int initialize_cpsch_cik(struct device_queue_manager *dqm); static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); -void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops) +void device_queue_manager_init_cik( + struct device_queue_manager_asic_ops *asic_ops) { - ops->set_cache_memory_policy = set_cache_memory_policy_cik; - ops->register_process = register_process_cik; - ops->initialize = initialize_cpsch_cik; - ops->init_sdma_vm = init_sdma_vm; + asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; + asic_ops->update_qpd = update_qpd_cik; + asic_ops->init_sdma_vm = init_sdma_vm; } static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) @@ -99,7 +98,7 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, return true; } -static int register_process_cik(struct device_queue_manager *dqm, +static int update_qpd_cik(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct kfd_process_device *pdd; @@ -148,8 +147,3 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, q->properties.sdma_vm_addr = value; } - -static int initialize_cpsch_cik(struct device_queue_manager *dqm) -{ - return 0; -} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 40e9ddd096cd..2fbce57a2f21 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -33,18 +33,17 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, enum cache_policy alternate_policy, void __user *alternate_aperture_base, uint64_t alternate_aperture_size); -static int register_process_vi(struct device_queue_manager *dqm, +static int update_qpd_vi(struct device_queue_manager *dqm, struct qcm_process_device *qpd); -static int initialize_cpsch_vi(struct device_queue_manager *dqm); static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); -void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops) +void device_queue_manager_init_vi( + struct device_queue_manager_asic_ops *asic_ops) { - ops->set_cache_memory_policy = set_cache_memory_policy_vi; - ops->register_process = register_process_vi; - ops->initialize = initialize_cpsch_vi; - ops->init_sdma_vm = init_sdma_vm; + asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi; + asic_ops->update_qpd = update_qpd_vi; + asic_ops->init_sdma_vm = init_sdma_vm; } static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) @@ -104,7 +103,7 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, return true; } -static int register_process_vi(struct device_queue_manager *dqm, +static int update_qpd_vi(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct kfd_process_device *pdd; @@ -160,8 +159,3 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, q->properties.sdma_vm_addr = value; } - -static int initialize_cpsch_vi(struct device_queue_manager *dqm) -{ - return 0; -} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 944abfad39c1..cb92d4b72400 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -24,8 +24,8 @@ #include <linux/slab.h> #include <linux/types.h> #include <linux/sched/signal.h> +#include <linux/sched/mm.h> #include <linux/uaccess.h> -#include <linux/mm.h> #include <linux/mman.h> #include <linux/memory.h> #include "kfd_priv.h" @@ -33,185 +33,89 @@ #include <linux/device.h> /* - * A task can only be on a single wait_queue at a time, but we need to support - * waiting on multiple events (any/all). - * Instead of each event simply having a wait_queue with sleeping tasks, it - * has a singly-linked list of tasks. - * A thread that wants to sleep creates an array of these, one for each event - * and adds one to each event's waiter chain. + * Wrapper around wait_queue_entry_t */ struct kfd_event_waiter { - struct list_head waiters; - struct task_struct *sleeping_task; - - /* Transitions to true when the event this belongs to is signaled. */ - bool activated; - - /* Event */ - struct kfd_event *event; - uint32_t input_index; + wait_queue_entry_t wait; + struct kfd_event *event; /* Event to wait for */ + bool activated; /* Becomes true when event is signaled */ }; /* - * Over-complicated pooled allocator for event notification slots. - * * Each signal event needs a 64-bit signal slot where the signaler will write - * a 1 before sending an interrupt.l (This is needed because some interrupts + * a 1 before sending an interrupt. (This is needed because some interrupts * do not contain enough spare data bits to identify an event.) - * We get whole pages from vmalloc and map them to the process VA. - * Individual signal events are then allocated a slot in a page. + * We get whole pages and map them to the process VA. + * Individual signal events use their event_id as slot index. */ - -struct signal_page { - struct list_head event_pages; /* kfd_process.signal_event_pages */ +struct kfd_signal_page { uint64_t *kernel_address; uint64_t __user *user_address; - uint32_t page_index; /* Index into the mmap aperture. */ - unsigned int free_slots; - unsigned long used_slot_bitmap[0]; }; -#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT -#define SLOT_BITMAP_SIZE BITS_TO_LONGS(SLOTS_PER_PAGE) -#define BITS_PER_PAGE (ilog2(SLOTS_PER_PAGE)+1) -#define SIGNAL_PAGE_SIZE (sizeof(struct signal_page) + \ - SLOT_BITMAP_SIZE * sizeof(long)) - -/* - * For signal events, the event ID is used as the interrupt user data. - * For SQ s_sendmsg interrupts, this is limited to 8 bits. - */ - -#define INTERRUPT_DATA_BITS 8 -#define SIGNAL_EVENT_ID_SLOT_SHIFT 0 -static uint64_t *page_slots(struct signal_page *page) +static uint64_t *page_slots(struct kfd_signal_page *page) { return page->kernel_address; } -static bool allocate_free_slot(struct kfd_process *process, - struct signal_page **out_page, - unsigned int *out_slot_index) -{ - struct signal_page *page; - - list_for_each_entry(page, &process->signal_event_pages, event_pages) { - if (page->free_slots > 0) { - unsigned int slot = - find_first_zero_bit(page->used_slot_bitmap, - SLOTS_PER_PAGE); - - __set_bit(slot, page->used_slot_bitmap); - page->free_slots--; - - page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT; - - *out_page = page; - *out_slot_index = slot; - - pr_debug("Allocated event signal slot in page %p, slot %d\n", - page, slot); - - return true; - } - } - - pr_debug("No free event signal slots were found for process %p\n", - process); - - return false; -} - -#define list_tail_entry(head, type, member) \ - list_entry((head)->prev, type, member) - -static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p) +static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) { void *backing_store; - struct signal_page *page; + struct kfd_signal_page *page; - page = kzalloc(SIGNAL_PAGE_SIZE, GFP_KERNEL); + page = kzalloc(sizeof(*page), GFP_KERNEL); if (!page) - goto fail_alloc_signal_page; + return NULL; - page->free_slots = SLOTS_PER_PAGE; - - backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, + backing_store = (void *) __get_free_pages(GFP_KERNEL, get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); if (!backing_store) goto fail_alloc_signal_store; - /* prevent user-mode info leaks */ + /* Initialize all events to unsignaled */ memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, - KFD_SIGNAL_EVENT_LIMIT * 8); + KFD_SIGNAL_EVENT_LIMIT * 8); page->kernel_address = backing_store; - - if (list_empty(&p->signal_event_pages)) - page->page_index = 0; - else - page->page_index = list_tail_entry(&p->signal_event_pages, - struct signal_page, - event_pages)->page_index + 1; - pr_debug("Allocated new event signal page at %p, for process %p\n", page, p); - pr_debug("Page index is %d\n", page->page_index); - list_add(&page->event_pages, &p->signal_event_pages); - - return true; + return page; fail_alloc_signal_store: kfree(page); -fail_alloc_signal_page: - return false; + return NULL; } -static bool allocate_event_notification_slot(struct file *devkfd, - struct kfd_process *p, - struct signal_page **page, - unsigned int *signal_slot_index) +static int allocate_event_notification_slot(struct kfd_process *p, + struct kfd_event *ev) { - bool ret; + int id; - ret = allocate_free_slot(p, page, signal_slot_index); - if (!ret) { - ret = allocate_signal_page(devkfd, p); - if (ret) - ret = allocate_free_slot(p, page, signal_slot_index); + if (!p->signal_page) { + p->signal_page = allocate_signal_page(p); + if (!p->signal_page) + return -ENOMEM; + /* Oldest user mode expects 256 event slots */ + p->signal_mapped_size = 256*8; } - return ret; -} - -/* Assumes that the process's event_mutex is locked. */ -static void release_event_notification_slot(struct signal_page *page, - size_t slot_index) -{ - __clear_bit(slot_index, page->used_slot_bitmap); - page->free_slots++; - - /* We don't free signal pages, they are retained by the process - * and reused until it exits. - */ -} - -static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, - unsigned int page_index) -{ - struct signal_page *page; - /* - * This is safe because we don't delete signal pages until the - * process exits. + * Compatibility with old user mode: Only use signal slots + * user mode has mapped, may be less than + * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase + * of the event limit without breaking user mode. */ - list_for_each_entry(page, &p->signal_event_pages, event_pages) - if (page->page_index == page_index) - return page; + id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8, + GFP_KERNEL); + if (id < 0) + return id; - return NULL; + ev->event_id = id; + page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT; + + return 0; } /* @@ -220,99 +124,81 @@ static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, */ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) { - struct kfd_event *ev; - - hash_for_each_possible(p->events, ev, events, id) - if (ev->event_id == id) - return ev; - - return NULL; + return idr_find(&p->event_idr, id); } -static u32 make_signal_event_id(struct signal_page *page, - unsigned int signal_slot_index) -{ - return page->page_index | - (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT); -} - -/* - * Produce a kfd event id for a nonsignal event. - * These are arbitrary numbers, so we do a sequential search through - * the hash table for an unused number. +/** + * lookup_signaled_event_by_partial_id - Lookup signaled event from partial ID + * @p: Pointer to struct kfd_process + * @id: ID to look up + * @bits: Number of valid bits in @id + * + * Finds the first signaled event with a matching partial ID. If no + * matching signaled event is found, returns NULL. In that case the + * caller should assume that the partial ID is invalid and do an + * exhaustive search of all siglaned events. + * + * If multiple events with the same partial ID signal at the same + * time, they will be found one interrupt at a time, not necessarily + * in the same order the interrupts occurred. As long as the number of + * interrupts is correct, all signaled events will be seen by the + * driver. */ -static u32 make_nonsignal_event_id(struct kfd_process *p) +static struct kfd_event *lookup_signaled_event_by_partial_id( + struct kfd_process *p, uint32_t id, uint32_t bits) { - u32 id; - - for (id = p->next_nonsignal_event_id; - id < KFD_LAST_NONSIGNAL_EVENT_ID && - lookup_event_by_id(p, id); - id++) - ; + struct kfd_event *ev; - if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { + if (!p->signal_page || id >= KFD_SIGNAL_EVENT_LIMIT) + return NULL; - /* - * What if id == LAST_NONSIGNAL_EVENT_ID - 1? - * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so - * the first loop fails immediately and we proceed with the - * wraparound loop below. - */ - p->next_nonsignal_event_id = id + 1; + /* Fast path for the common case that @id is not a partial ID + * and we only need a single lookup. + */ + if (bits > 31 || (1U << bits) >= KFD_SIGNAL_EVENT_LIMIT) { + if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) + return NULL; - return id; + return idr_find(&p->event_idr, id); } - for (id = KFD_FIRST_NONSIGNAL_EVENT_ID; - id < KFD_LAST_NONSIGNAL_EVENT_ID && - lookup_event_by_id(p, id); - id++) - ; - + /* General case for partial IDs: Iterate over all matching IDs + * and find the first one that has signaled. + */ + for (ev = NULL; id < KFD_SIGNAL_EVENT_LIMIT && !ev; id += 1U << bits) { + if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) + continue; - if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { - p->next_nonsignal_event_id = id + 1; - return id; + ev = idr_find(&p->event_idr, id); } - p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; - return 0; -} - -static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p, - struct signal_page *page, - unsigned int signal_slot) -{ - return lookup_event_by_id(p, make_signal_event_id(page, signal_slot)); + return ev; } static int create_signal_event(struct file *devkfd, struct kfd_process *p, struct kfd_event *ev) { - if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { + int ret; + + if (p->signal_mapped_size && + p->signal_event_count == p->signal_mapped_size / 8) { if (!p->signal_event_limit_reached) { pr_warn("Signal event wasn't created because limit was reached\n"); p->signal_event_limit_reached = true; } - return -ENOMEM; + return -ENOSPC; } - if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page, - &ev->signal_slot_index)) { + ret = allocate_event_notification_slot(p, ev); + if (ret) { pr_warn("Signal event wasn't created because out of kernel memory\n"); - return -ENOMEM; + return ret; } p->signal_event_count++; - ev->user_signal_address = - &ev->signal_page->user_address[ev->signal_slot_index]; - - ev->event_id = make_signal_event_id(ev->signal_page, - ev->signal_slot_index); - + ev->user_signal_address = &p->signal_page->user_address[ev->event_id]; pr_debug("Signal event number %zu created with id %d, address %p\n", p->signal_event_count, ev->event_id, ev->user_signal_address); @@ -320,16 +206,20 @@ static int create_signal_event(struct file *devkfd, return 0; } -/* - * No non-signal events are supported yet. - * We create them as events that never signal. - * Set event calls from user-mode are failed. - */ static int create_other_event(struct kfd_process *p, struct kfd_event *ev) { - ev->event_id = make_nonsignal_event_id(p); - if (ev->event_id == 0) - return -ENOMEM; + /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an + * intentional integer overflow to -1 without a compiler + * warning. idr_alloc treats a negative value as "maximum + * signed integer". + */ + int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID, + (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1, + GFP_KERNEL); + + if (id < 0) + return id; + ev->event_id = id; return 0; } @@ -337,50 +227,47 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev) void kfd_event_init_process(struct kfd_process *p) { mutex_init(&p->event_mutex); - hash_init(p->events); - INIT_LIST_HEAD(&p->signal_event_pages); - p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; + idr_init(&p->event_idr); + p->signal_page = NULL; p->signal_event_count = 0; } static void destroy_event(struct kfd_process *p, struct kfd_event *ev) { - if (ev->signal_page) { - release_event_notification_slot(ev->signal_page, - ev->signal_slot_index); - p->signal_event_count--; - } + struct kfd_event_waiter *waiter; - /* - * Abandon the list of waiters. Individual waiting threads will - * clean up their own data. - */ - list_del(&ev->waiters); + /* Wake up pending waiters. They will return failure */ + list_for_each_entry(waiter, &ev->wq.head, wait.entry) + waiter->event = NULL; + wake_up_all(&ev->wq); + + if (ev->type == KFD_EVENT_TYPE_SIGNAL || + ev->type == KFD_EVENT_TYPE_DEBUG) + p->signal_event_count--; - hash_del(&ev->events); + idr_remove(&p->event_idr, ev->event_id); kfree(ev); } static void destroy_events(struct kfd_process *p) { struct kfd_event *ev; - struct hlist_node *tmp; - unsigned int hash_bkt; + uint32_t id; - hash_for_each_safe(p->events, hash_bkt, tmp, ev, events) + idr_for_each_entry(&p->event_idr, ev, id) destroy_event(p, ev); + idr_destroy(&p->event_idr); } /* * We assume that the process is being destroyed and there is no need to * unmap the pages or keep bookkeeping data in order. */ -static void shutdown_signal_pages(struct kfd_process *p) +static void shutdown_signal_page(struct kfd_process *p) { - struct signal_page *page, *tmp; + struct kfd_signal_page *page = p->signal_page; - list_for_each_entry_safe(page, tmp, &p->signal_event_pages, - event_pages) { + if (page) { free_pages((unsigned long)page->kernel_address, get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); kfree(page); @@ -390,7 +277,7 @@ static void shutdown_signal_pages(struct kfd_process *p) void kfd_event_free_process(struct kfd_process *p) { destroy_events(p); - shutdown_signal_pages(p); + shutdown_signal_page(p); } static bool event_can_be_gpu_signaled(const struct kfd_event *ev) @@ -419,7 +306,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, ev->auto_reset = auto_reset; ev->signaled = false; - INIT_LIST_HEAD(&ev->waiters); + init_waitqueue_head(&ev->wq); *event_page_offset = 0; @@ -430,10 +317,9 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, case KFD_EVENT_TYPE_DEBUG: ret = create_signal_event(devkfd, p, ev); if (!ret) { - *event_page_offset = (ev->signal_page->page_index | - KFD_MMAP_EVENTS_MASK); + *event_page_offset = KFD_MMAP_EVENTS_MASK; *event_page_offset <<= PAGE_SHIFT; - *event_slot_index = ev->signal_slot_index; + *event_slot_index = ev->event_id; } break; default: @@ -442,8 +328,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, } if (!ret) { - hash_add(p->events, &ev->events, ev->event_id); - *event_id = ev->event_id; *event_trigger_data = ev->event_id; } else { @@ -477,19 +361,18 @@ int kfd_event_destroy(struct kfd_process *p, uint32_t event_id) static void set_event(struct kfd_event *ev) { struct kfd_event_waiter *waiter; - struct kfd_event_waiter *next; - /* Auto reset if the list is non-empty and we're waking someone. */ - ev->signaled = !ev->auto_reset || list_empty(&ev->waiters); + /* Auto reset if the list is non-empty and we're waking + * someone. waitqueue_active is safe here because we're + * protected by the p->event_mutex, which is also held when + * updating the wait queues in kfd_wait_on_events. + */ + ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq); - list_for_each_entry_safe(waiter, next, &ev->waiters, waiters) { + list_for_each_entry(waiter, &ev->wq.head, wait.entry) waiter->activated = true; - /* _init because free_waiters will call list_del */ - list_del_init(&waiter->waiters); - - wake_up_process(waiter->sleeping_task); - } + wake_up_all(&ev->wq); } /* Assumes that p is current. */ @@ -538,13 +421,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id) static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) { - page_slots(ev->signal_page)[ev->signal_slot_index] = - UNSIGNALED_EVENT_SLOT; -} - -static bool is_slot_signaled(struct signal_page *page, unsigned int index) -{ - return page_slots(page)[index] != UNSIGNALED_EVENT_SLOT; + page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT; } static void set_event_from_interrupt(struct kfd_process *p, @@ -559,7 +436,7 @@ static void set_event_from_interrupt(struct kfd_process *p, void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, uint32_t valid_id_bits) { - struct kfd_event *ev; + struct kfd_event *ev = NULL; /* * Because we are called from arbitrary context (workqueue) as opposed @@ -573,26 +450,46 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, mutex_lock(&p->event_mutex); - if (valid_id_bits >= INTERRUPT_DATA_BITS) { - /* Partial ID is a full ID. */ - ev = lookup_event_by_id(p, partial_id); + if (valid_id_bits) + ev = lookup_signaled_event_by_partial_id(p, partial_id, + valid_id_bits); + if (ev) { set_event_from_interrupt(p, ev); - } else { + } else if (p->signal_page) { /* - * Partial ID is in fact partial. For now we completely - * ignore it, but we could use any bits we did receive to - * search faster. + * Partial ID lookup failed. Assume that the event ID + * in the interrupt payload was invalid and do an + * exhaustive search of signaled events. */ - struct signal_page *page; - unsigned int i; - - list_for_each_entry(page, &p->signal_event_pages, event_pages) - for (i = 0; i < SLOTS_PER_PAGE; i++) - if (is_slot_signaled(page, i)) { - ev = lookup_event_by_page_slot(p, - page, i); + uint64_t *slots = page_slots(p->signal_page); + uint32_t id; + + if (valid_id_bits) + pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n", + partial_id, valid_id_bits); + + if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { + /* With relatively few events, it's faster to + * iterate over the event IDR + */ + idr_for_each_entry(&p->event_idr, ev, id) { + if (id >= KFD_SIGNAL_EVENT_LIMIT) + break; + + if (slots[id] != UNSIGNALED_EVENT_SLOT) + set_event_from_interrupt(p, ev); + } + } else { + /* With relatively many events, it's faster to + * iterate over the signal slots and lookup + * only signaled events from the IDR. + */ + for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++) + if (slots[id] != UNSIGNALED_EVENT_SLOT) { + ev = lookup_event_by_id(p, id); set_event_from_interrupt(p, ev); } + } } mutex_unlock(&p->event_mutex); @@ -609,18 +506,16 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) GFP_KERNEL); for (i = 0; (event_waiters) && (i < num_events) ; i++) { - INIT_LIST_HEAD(&event_waiters[i].waiters); - event_waiters[i].sleeping_task = current; + init_wait(&event_waiters[i].wait); event_waiters[i].activated = false; } return event_waiters; } -static int init_event_waiter(struct kfd_process *p, +static int init_event_waiter_get_status(struct kfd_process *p, struct kfd_event_waiter *waiter, - uint32_t event_id, - uint32_t input_index) + uint32_t event_id) { struct kfd_event *ev = lookup_event_by_id(p, event_id); @@ -628,38 +523,60 @@ static int init_event_waiter(struct kfd_process *p, return -EINVAL; waiter->event = ev; - waiter->input_index = input_index; waiter->activated = ev->signaled; ev->signaled = ev->signaled && !ev->auto_reset; - list_add(&waiter->waiters, &ev->waiters); - return 0; } -static bool test_event_condition(bool all, uint32_t num_events, +static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter) +{ + struct kfd_event *ev = waiter->event; + + /* Only add to the wait list if we actually need to + * wait on this event. + */ + if (!waiter->activated) + add_wait_queue(&ev->wq, &waiter->wait); +} + +/* test_event_condition - Test condition of events being waited for + * @all: Return completion only if all events have signaled + * @num_events: Number of events to wait for + * @event_waiters: Array of event waiters, one per event + * + * Returns KFD_IOC_WAIT_RESULT_COMPLETE if all (or one) event(s) have + * signaled. Returns KFD_IOC_WAIT_RESULT_TIMEOUT if no (or not all) + * events have signaled. Returns KFD_IOC_WAIT_RESULT_FAIL if any of + * the events have been destroyed. + */ +static uint32_t test_event_condition(bool all, uint32_t num_events, struct kfd_event_waiter *event_waiters) { uint32_t i; uint32_t activated_count = 0; for (i = 0; i < num_events; i++) { + if (!event_waiters[i].event) + return KFD_IOC_WAIT_RESULT_FAIL; + if (event_waiters[i].activated) { if (!all) - return true; + return KFD_IOC_WAIT_RESULT_COMPLETE; activated_count++; } } - return activated_count == num_events; + return activated_count == num_events ? + KFD_IOC_WAIT_RESULT_COMPLETE : KFD_IOC_WAIT_RESULT_TIMEOUT; } /* * Copy event specific data, if defined. * Currently only memory exception events have additional data to copy to user */ -static bool copy_signaled_event_data(uint32_t num_events, +static int copy_signaled_event_data(uint32_t num_events, struct kfd_event_waiter *event_waiters, struct kfd_event_data __user *data) { @@ -673,15 +590,15 @@ static bool copy_signaled_event_data(uint32_t num_events, waiter = &event_waiters[i]; event = waiter->event; if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) { - dst = &data[waiter->input_index].memory_exception_data; + dst = &data[i].memory_exception_data; src = &event->memory_exception_data; if (copy_to_user(dst, src, sizeof(struct kfd_hsa_memory_exception_data))) - return false; + return -EFAULT; } } - return true; + return 0; } @@ -710,7 +627,9 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) uint32_t i; for (i = 0; i < num_events; i++) - list_del(&waiters[i].waiters); + if (waiters[i].event) + remove_wait_queue(&waiters[i].event->wq, + &waiters[i].wait); kfree(waiters); } @@ -718,38 +637,56 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) int kfd_wait_on_events(struct kfd_process *p, uint32_t num_events, void __user *data, bool all, uint32_t user_timeout_ms, - enum kfd_event_wait_result *wait_result) + uint32_t *wait_result) { struct kfd_event_data __user *events = (struct kfd_event_data __user *) data; uint32_t i; int ret = 0; + struct kfd_event_waiter *event_waiters = NULL; long timeout = user_timeout_to_jiffies(user_timeout_ms); - mutex_lock(&p->event_mutex); - event_waiters = alloc_event_waiters(num_events); if (!event_waiters) { ret = -ENOMEM; - goto fail; + goto out; } + mutex_lock(&p->event_mutex); + for (i = 0; i < num_events; i++) { struct kfd_event_data event_data; if (copy_from_user(&event_data, &events[i], sizeof(struct kfd_event_data))) { ret = -EFAULT; - goto fail; + goto out_unlock; } - ret = init_event_waiter(p, &event_waiters[i], - event_data.event_id, i); + ret = init_event_waiter_get_status(p, &event_waiters[i], + event_data.event_id); if (ret) - goto fail; + goto out_unlock; } + /* Check condition once. */ + *wait_result = test_event_condition(all, num_events, event_waiters); + if (*wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) { + ret = copy_signaled_event_data(num_events, + event_waiters, events); + goto out_unlock; + } else if (WARN_ON(*wait_result == KFD_IOC_WAIT_RESULT_FAIL)) { + /* This should not happen. Events shouldn't be + * destroyed while we're holding the event_mutex + */ + goto out_unlock; + } + + /* Add to wait lists if we need to wait. */ + for (i = 0; i < num_events; i++) + init_event_waiter_add_to_waitlist(&event_waiters[i]); + mutex_unlock(&p->event_mutex); while (true) { @@ -771,62 +708,66 @@ int kfd_wait_on_events(struct kfd_process *p, break; } - if (test_event_condition(all, num_events, event_waiters)) { - if (copy_signaled_event_data(num_events, - event_waiters, events)) - *wait_result = KFD_WAIT_COMPLETE; - else - *wait_result = KFD_WAIT_ERROR; + /* Set task state to interruptible sleep before + * checking wake-up conditions. A concurrent wake-up + * will put the task back into runnable state. In that + * case schedule_timeout will not put the task to + * sleep and we'll get a chance to re-check the + * updated conditions almost immediately. Otherwise, + * this race condition would lead to a soft hang or a + * very long sleep. + */ + set_current_state(TASK_INTERRUPTIBLE); + + *wait_result = test_event_condition(all, num_events, + event_waiters); + if (*wait_result != KFD_IOC_WAIT_RESULT_TIMEOUT) break; - } - if (timeout <= 0) { - *wait_result = KFD_WAIT_TIMEOUT; + if (timeout <= 0) break; - } - timeout = schedule_timeout_interruptible(timeout); + timeout = schedule_timeout(timeout); } __set_current_state(TASK_RUNNING); + /* copy_signaled_event_data may sleep. So this has to happen + * after the task state is set back to RUNNING. + */ + if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) + ret = copy_signaled_event_data(num_events, + event_waiters, events); + mutex_lock(&p->event_mutex); +out_unlock: free_waiters(num_events, event_waiters); mutex_unlock(&p->event_mutex); - - return ret; - -fail: - if (event_waiters) - free_waiters(num_events, event_waiters); - - mutex_unlock(&p->event_mutex); - - *wait_result = KFD_WAIT_ERROR; +out: + if (ret) + *wait_result = KFD_IOC_WAIT_RESULT_FAIL; + else if (*wait_result == KFD_IOC_WAIT_RESULT_FAIL) + ret = -EIO; return ret; } int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) { - - unsigned int page_index; unsigned long pfn; - struct signal_page *page; + struct kfd_signal_page *page; + int ret; - /* check required size is logical */ - if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) != + /* check required size doesn't exceed the allocated size */ + if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) < get_order(vma->vm_end - vma->vm_start)) { pr_err("Event page mmap requested illegal size\n"); return -EINVAL; } - page_index = vma->vm_pgoff; - - page = lookup_signal_page_by_index(p, page_index); + page = p->signal_page; if (!page) { /* Probably KFD bug, but mmap is user-accessible. */ - pr_debug("Signal page could not be found for page_index %u\n", - page_index); + pr_debug("Signal page could not be found\n"); return -EINVAL; } @@ -847,8 +788,12 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) page->user_address = (uint64_t __user *)vma->vm_start; /* mapping the page to user process */ - return remap_pfn_range(vma, vma->vm_start, pfn, + ret = remap_pfn_range(vma, vma->vm_start, pfn, vma->vm_end - vma->vm_start, vma->vm_page_prot); + if (!ret) + p->signal_mapped_size = vma->vm_end - vma->vm_start; + + return ret; } /* @@ -860,12 +805,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, { struct kfd_hsa_memory_exception_data *ev_data; struct kfd_event *ev; - int bkt; + uint32_t id; bool send_signal = true; ev_data = (struct kfd_hsa_memory_exception_data *) event_data; - hash_for_each(p->events, bkt, ev, events) + id = KFD_FIRST_NONSIGNAL_EVENT_ID; + idr_for_each_entry_continue(&p->event_idr, ev, id) if (ev->type == type) { send_signal = false; dev_dbg(kfd_device, @@ -904,14 +850,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, * running so the lookup function returns a locked process. */ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + struct mm_struct *mm; if (!p) return; /* Presumably process exited. */ + /* Take a safe reference to the mm_struct, which may otherwise + * disappear even while the kfd_process is still referenced. + */ + mm = get_task_mm(p->lead_thread); + if (!mm) { + mutex_unlock(&p->mutex); + return; /* Process is exiting */ + } + memset(&memory_exception_data, 0, sizeof(memory_exception_data)); - down_read(&p->mm->mmap_sem); - vma = find_vma(p->mm, address); + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); memory_exception_data.gpu_id = dev->id; memory_exception_data.va = address; @@ -937,7 +893,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, } } - up_read(&p->mm->mmap_sem); + up_read(&mm->mmap_sem); + mmput(mm); mutex_lock(&p->event_mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h index 28f6838b1f4c..abca5bfebbff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h @@ -27,12 +27,17 @@ #include <linux/hashtable.h> #include <linux/types.h> #include <linux/list.h> +#include <linux/wait.h> #include "kfd_priv.h" #include <uapi/linux/kfd_ioctl.h> -#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U -#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK -#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX +/* + * IDR supports non-negative integer IDs. Small IDs are used for + * signal events to match their signal slot. Use the upper half of the + * ID space for non-signal events. + */ +#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1) +#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX /* * Written into kfd_signal_slot_t to indicate that the event is not signaled. @@ -46,9 +51,6 @@ struct kfd_event_waiter; struct signal_page; struct kfd_event { - /* All events in process, rooted at kfd_process.events. */ - struct hlist_node events; - u32 event_id; bool signaled; @@ -56,11 +58,9 @@ struct kfd_event { int type; - struct list_head waiters; /* List of kfd_event_waiter by waiters. */ + wait_queue_head_t wq; /* List of event waiters. */ /* Only for signal events. */ - struct signal_page *signal_page; - unsigned int signal_slot_index; uint64_t __user *user_signal_address; /* type specific data */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 70b3a99cffc2..035c351f47c5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -42,26 +42,26 @@ #include <linux/slab.h> #include <linux/device.h> +#include <linux/kfifo.h> #include "kfd_priv.h" -#define KFD_INTERRUPT_RING_SIZE 1024 +#define KFD_IH_NUM_ENTRIES 8192 static void interrupt_wq(struct work_struct *); int kfd_interrupt_init(struct kfd_dev *kfd) { - void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE, - kfd->device_info->ih_ring_entry_size, - GFP_KERNEL); - if (!interrupt_ring) - return -ENOMEM; - - kfd->interrupt_ring = interrupt_ring; - kfd->interrupt_ring_size = - KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size; - atomic_set(&kfd->interrupt_ring_wptr, 0); - atomic_set(&kfd->interrupt_ring_rptr, 0); + int r; + + r = kfifo_alloc(&kfd->ih_fifo, + KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size, + GFP_KERNEL); + if (r) { + dev_err(kfd_chardev(), "Failed to allocate IH fifo\n"); + return r; + } + kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1); spin_lock_init(&kfd->interrupt_lock); INIT_WORK(&kfd->interrupt_work, interrupt_wq); @@ -92,74 +92,47 @@ void kfd_interrupt_exit(struct kfd_dev *kfd) spin_unlock_irqrestore(&kfd->interrupt_lock, flags); /* - * Flush_scheduled_work ensures that there are no outstanding + * flush_work ensures that there are no outstanding * work-queue items that will access interrupt_ring. New work items * can't be created because we stopped interrupt handling above. */ - flush_scheduled_work(); + flush_workqueue(kfd->ih_wq); - kfree(kfd->interrupt_ring); + kfifo_free(&kfd->ih_fifo); } /* - * This assumes that it can't be called concurrently with itself - * but only with dequeue_ih_ring_entry. + * Assumption: single reader/writer. This function is not re-entrant */ bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) { - unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); - unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); + int count; - if ((rptr - wptr) % kfd->interrupt_ring_size == - kfd->device_info->ih_ring_entry_size) { - /* This is very bad, the system is likely to hang. */ + count = kfifo_in(&kfd->ih_fifo, ih_ring_entry, + kfd->device_info->ih_ring_entry_size); + if (count != kfd->device_info->ih_ring_entry_size) { dev_err_ratelimited(kfd_chardev(), - "Interrupt ring overflow, dropping interrupt.\n"); + "Interrupt ring overflow, dropping interrupt %d\n", + count); return false; } - memcpy(kfd->interrupt_ring + wptr, ih_ring_entry, - kfd->device_info->ih_ring_entry_size); - - wptr = (wptr + kfd->device_info->ih_ring_entry_size) % - kfd->interrupt_ring_size; - smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */ - atomic_set(&kfd->interrupt_ring_wptr, wptr); - return true; } /* - * This assumes that it can't be called concurrently with itself - * but only with enqueue_ih_ring_entry. + * Assumption: single reader/writer. This function is not re-entrant */ static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) { - /* - * Assume that wait queues have an implicit barrier, i.e. anything that - * happened in the ISR before it queued work is visible. - */ - - unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); - unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); + int count; - if (rptr == wptr) - return false; - - memcpy(ih_ring_entry, kfd->interrupt_ring + rptr, - kfd->device_info->ih_ring_entry_size); - - rptr = (rptr + kfd->device_info->ih_ring_entry_size) % - kfd->interrupt_ring_size; + count = kfifo_out(&kfd->ih_fifo, ih_ring_entry, + kfd->device_info->ih_ring_entry_size); - /* - * Ensure the rptr write update is not visible until - * memcpy has finished reading. - */ - smp_mb(); - atomic_set(&kfd->interrupt_ring_rptr, rptr); + WARN_ON(count && count != kfd->device_info->ih_ring_entry_size); - return true; + return count == kfd->device_info->ih_ring_entry_size; } static void interrupt_wq(struct work_struct *work) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 44ffd23348fc..4859d263fa2a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -189,12 +189,9 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, if (q->format == KFD_QUEUE_FORMAT_AQL) m->cp_hqd_pq_control |= NO_UPDATE_RPTR; - q->is_active = false; - if (q->queue_size > 0 && + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && - q->queue_percent > 0) { - q->is_active = true; - } + q->queue_percent > 0); return 0; } @@ -215,24 +212,17 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8); m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); - m->sdma_rlc_doorbell = q->doorbell_off << - SDMA0_RLC0_DOORBELL__OFFSET__SHIFT | - 1 << SDMA0_RLC0_DOORBELL__ENABLE__SHIFT; + m->sdma_rlc_doorbell = + q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT; m->sdma_rlc_virtual_addr = q->sdma_vm_addr; m->sdma_engine_id = q->sdma_engine_id; m->sdma_queue_id = q->sdma_queue_id; - q->is_active = false; - if (q->queue_size > 0 && + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && - q->queue_percent > 0) { - m->sdma_rlc_rb_cntl |= - 1 << SDMA0_RLC0_RB_CNTL__RB_ENABLE__SHIFT; - - q->is_active = true; - } + q->queue_percent > 0); return 0; } @@ -359,19 +349,13 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); - m->cp_hqd_pq_doorbell_control = DOORBELL_EN | - DOORBELL_OFFSET(q->doorbell_off); + m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off); m->cp_hqd_vmid = q->vmid; - m->cp_hqd_active = 0; - q->is_active = false; - if (q->queue_size > 0 && + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && - q->queue_percent > 0) { - m->cp_hqd_active = 1; - q->is_active = true; - } + q->queue_percent > 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 73cbfe186dd2..4ea854f9007b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -163,12 +163,9 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; } - q->is_active = false; - if (q->queue_size > 0 && + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && - q->queue_percent > 0) { - q->is_active = true; - } + q->queue_percent > 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 7d86ec9790d3..9e4134c5b481 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -31,6 +31,8 @@ #include <linux/workqueue.h> #include <linux/spinlock.h> #include <linux/kfd_ioctl.h> +#include <linux/idr.h> +#include <linux/kfifo.h> #include <kgd_kfd_interface.h> #include "amd_shared.h" @@ -181,10 +183,8 @@ struct kfd_dev { unsigned int gtt_sa_num_of_chunks; /* Interrupts */ - void *interrupt_ring; - size_t interrupt_ring_size; - atomic_t interrupt_ring_rptr; - atomic_t interrupt_ring_wptr; + struct kfifo ih_fifo; + struct workqueue_struct *ih_wq; struct work_struct interrupt_work; spinlock_t interrupt_lock; @@ -494,7 +494,12 @@ struct kfd_process { */ struct hlist_node kfd_processes; - struct mm_struct *mm; + /* + * Opaque pointer to mm_struct. We don't hold a reference to + * it so it should never be dereferenced from here. This is + * only used for looking up processes by their mm. + */ + void *mm; struct mutex mutex; @@ -502,6 +507,8 @@ struct kfd_process { * In any process, the thread that started main() is the lead * thread and outlives the rest. * It is here because amd_iommu_bind_pasid wants a task_struct. + * It can also be used for safely getting a reference to the + * mm_struct of the process. */ struct task_struct *lead_thread; @@ -522,22 +529,16 @@ struct kfd_process { struct process_queue_manager pqm; - /* The process's queues. */ - size_t queue_array_size; - - /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */ - struct kfd_queue **queues; - /*Is the user space process 32 bit?*/ bool is_32bit_user_mode; /* Event-related data */ struct mutex event_mutex; - /* All events in process hashed by ID, linked on kfd_event.events. */ - DECLARE_HASHTABLE(events, 4); - /* struct slot_page_header.event_pages */ - struct list_head signal_event_pages; - u32 next_nonsignal_event_id; + /* Event ID allocator and lookup */ + struct idr event_idr; + /* Event page */ + struct kfd_signal_page *signal_page; + size_t signal_mapped_size; size_t signal_event_count; bool signal_event_limit_reached; }; @@ -721,19 +722,13 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd); extern const struct kfd_event_interrupt_class event_interrupt_class_cik; extern const struct kfd_device_global_init_class device_global_init_class_cik; -enum kfd_event_wait_result { - KFD_WAIT_COMPLETE, - KFD_WAIT_TIMEOUT, - KFD_WAIT_ERROR -}; - void kfd_event_init_process(struct kfd_process *p); void kfd_event_free_process(struct kfd_process *p); int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma); int kfd_wait_on_events(struct kfd_process *p, uint32_t num_events, void __user *data, bool all, uint32_t user_timeout_ms, - enum kfd_event_wait_result *wait_result); + uint32_t *wait_result); void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, uint32_t valid_id_bits); void kfd_signal_iommu_event(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 3ccb3b53216e..1f5ccd28bd41 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -35,13 +35,6 @@ struct mm_struct; #include "kfd_dbgmgr.h" /* - * Initial size for the array of queues. - * The allocated size is doubled each time - * it is exceeded up to MAX_PROCESS_QUEUES. - */ -#define INITIAL_QUEUE_ARRAY_SIZE 16 - -/* * List of struct kfd_process (field kfd_process). * Unique/indexed by mm_struct* */ @@ -187,8 +180,6 @@ static void kfd_process_wq_release(struct work_struct *work) mutex_destroy(&p->mutex); - kfree(p->queues); - kfree(p); kfree(work); @@ -200,7 +191,6 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu) struct kfd_process *p; p = container_of(rcu, struct kfd_process, rcu); - WARN_ON(atomic_read(&p->mm->mm_count) <= 0); mmdrop(p->mm); @@ -234,17 +224,26 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, mutex_lock(&p->mutex); + /* Iterate over all process device data structures and if the + * pdd is in debug mode, we should first force unregistration, + * then we will be able to destroy the queues + */ + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + struct kfd_dev *dev = pdd->dev; + + mutex_lock(kfd_get_dbgmgr_mutex()); + if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { + if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { + kfd_dbgmgr_destroy(dev->dbgmgr); + dev->dbgmgr = NULL; + } + } + mutex_unlock(kfd_get_dbgmgr_mutex()); + } + kfd_process_dequeue_from_all_devices(p); pqm_uninit(&p->pqm); - /* Iterate over all process device data structure and check - * if we should delete debug managers - */ - list_for_each_entry(pdd, &p->per_device_data, per_device_list) - if ((pdd->dev->dbgmgr) && - (pdd->dev->dbgmgr->pasid == p->pasid)) - kfd_dbgmgr_destroy(pdd->dev->dbgmgr); - mutex_unlock(&p->mutex); /* @@ -271,11 +270,6 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (!process) goto err_alloc_process; - process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE, - sizeof(process->queues[0]), GFP_KERNEL); - if (!process->queues) - goto err_alloc_queues; - process->pasid = kfd_pasid_alloc(); if (process->pasid == 0) goto err_alloc_pasid; @@ -298,8 +292,6 @@ static struct kfd_process *create_process(const struct task_struct *thread) process->lead_thread = thread->group_leader; - process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE; - INIT_LIST_HEAD(&process->per_device_data); kfd_event_init_process(process); @@ -328,8 +320,6 @@ err_mmu_notifier: err_alloc_doorbells: kfd_pasid_free(process->pasid); err_alloc_pasid: - kfree(process->queues); -err_alloc_queues: kfree(process); err_alloc_process: return ERR_PTR(err); @@ -426,7 +416,7 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev) err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); if (err < 0) { - pr_err("unexpected pasid %d binding failure\n", + pr_err("Unexpected pasid %d binding failure\n", p->pasid); mutex_unlock(&p->mutex); break; @@ -442,29 +432,25 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev) } /* - * Temporarily unbind currently bound processes from the device and - * mark them as PDD_BOUND_SUSPENDED. These processes will be restored - * to PDD_BOUND state in kfd_bind_processes_to_device. + * Mark currently bound processes as PDD_BOUND_SUSPENDED. These + * processes will be restored to PDD_BOUND state in + * kfd_bind_processes_to_device. */ void kfd_unbind_processes_from_device(struct kfd_dev *dev) { struct kfd_process_device *pdd; struct kfd_process *p; - unsigned int temp, temp_bound, temp_pasid; + unsigned int temp; int idx = srcu_read_lock(&kfd_processes_srcu); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { mutex_lock(&p->mutex); pdd = kfd_get_process_device_data(dev, p); - temp_bound = pdd->bound; - temp_pasid = p->pasid; + if (pdd->bound == PDD_BOUND) pdd->bound = PDD_BOUND_SUSPENDED; mutex_unlock(&p->mutex); - - if (temp_bound == PDD_BOUND) - amd_iommu_unbind_pasid(dev->pdev, temp_pasid); } srcu_read_unlock(&kfd_processes_srcu, idx); @@ -486,8 +472,16 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) pr_debug("Unbinding process %d from IOMMU\n", pasid); - if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) - kfd_dbgmgr_destroy(dev->dbgmgr); + mutex_lock(kfd_get_dbgmgr_mutex()); + + if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { + if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { + kfd_dbgmgr_destroy(dev->dbgmgr); + dev->dbgmgr = NULL; + } + } + + mutex_unlock(kfd_get_dbgmgr_mutex()); pdd = kfd_get_process_device_data(dev, p); if (pdd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 5129dc139219..2bec902fc939 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -177,7 +177,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, if (retval != 0) return retval; - if (list_empty(&pqm->queues)) { + if (list_empty(&pdd->qpd.queues_list) && + list_empty(&pdd->qpd.priv_queue_list)) { pdd->qpd.pqm = pqm; dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); } @@ -248,7 +249,8 @@ err_create_queue: err_allocate_pqn: /* check if queues list is empty unregister process from device */ clear_bit(*qid, pqm->queue_slot_bitmap); - if (list_empty(&pqm->queues)) + if (list_empty(&pdd->qpd.queues_list) && + list_empty(&pdd->qpd.priv_queue_list)) dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd); return retval; } @@ -302,7 +304,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) kfree(pqn); clear_bit(qid, pqm->queue_slot_bitmap); - if (list_empty(&pqm->queues)) + if (list_empty(&pdd->qpd.queues_list) && + list_empty(&pdd->qpd.priv_queue_list)) dqm->ops.unregister_process(dqm, &pdd->qpd); return retval; diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index de6fc2731b98..b72f8a43d86b 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -23,36 +23,11 @@ #ifndef __AMD_SHARED_H__ #define __AMD_SHARED_H__ -#define AMD_MAX_USEC_TIMEOUT 200000 /* 200 ms */ +#include <drm/amd_asic_type.h> struct seq_file; -/* - * Supported ASIC types - */ -enum amd_asic_type { - CHIP_TAHITI = 0, - CHIP_PITCAIRN, - CHIP_VERDE, - CHIP_OLAND, - CHIP_HAINAN, - CHIP_BONAIRE, - CHIP_KAVERI, - CHIP_KABINI, - CHIP_HAWAII, - CHIP_MULLINS, - CHIP_TOPAZ, - CHIP_TONGA, - CHIP_FIJI, - CHIP_CARRIZO, - CHIP_STONEY, - CHIP_POLARIS10, - CHIP_POLARIS11, - CHIP_POLARIS12, - CHIP_VEGA10, - CHIP_RAVEN, - CHIP_LAST, -}; +#define AMD_MAX_USEC_TIMEOUT 200000 /* 200 ms */ /* * Chip flags diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 3c8ef4bfc205..c7e34128cbde 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -78,6 +78,9 @@ static int amd_powerplay_destroy(void *handle) { struct pp_instance *instance = (struct pp_instance *)handle; + kfree(instance->hwmgr->hardcode_pp_table); + instance->hwmgr->hardcode_pp_table = NULL; + kfree(instance->hwmgr); instance->hwmgr = NULL; @@ -1184,7 +1187,7 @@ int amd_powerplay_reset(void *handle) int ret; ret = pp_check(instance); - if (!ret) + if (ret) return ret; ret = pp_hw_fini(instance); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c index 938010842c7d..3e0b267c74a8 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c @@ -672,36 +672,20 @@ static int rv_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_p PHM_PerformanceLevelDesignation designation, uint32_t index, PHM_PerformanceLevel *level) { - const struct rv_power_state *ps; struct rv_hwmgr *data; - uint32_t level_index; - uint32_t i; - uint32_t vol_dep_record_index = 0; if (level == NULL || hwmgr == NULL || state == NULL) return -EINVAL; data = (struct rv_hwmgr *)(hwmgr->backend); - ps = cast_const_rv_ps(state); - - level_index = index > ps->level - 1 ? ps->level - 1 : index; - level->coreClock = 30000; - if (designation == PHM_PerformanceLevelDesignation_PowerContainment) { - for (i = 1; i < ps->level; i++) { - if (ps->levels[i].engine_clock > data->dce_slow_sclk_threshold) { - level->coreClock = 30000; - break; - } - } - } - - if (level_index == 0) { - vol_dep_record_index = data->clock_vol_info.vdd_dep_on_fclk->count - 1; - level->memory_clock = - data->clock_vol_info.vdd_dep_on_fclk->entries[vol_dep_record_index].clk; - } else { + if (index == 0) { level->memory_clock = data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk; + level->coreClock = data->gfx_min_freq_limit; + } else { + level->memory_clock = data->clock_vol_info.vdd_dep_on_fclk->entries[ + data->clock_vol_info.vdd_dep_on_fclk->count - 1].clk; + level->coreClock = data->gfx_max_freq_limit; } level->nonLocalMemoryFreq = 0; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index e32f18a99074..4466469cf8ab 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -815,7 +815,7 @@ uint32_t smu7_get_xclk(struct pp_hwmgr *hwmgr) { uint32_t reference_clock, tmp; struct cgs_display_info info = {0}; - struct cgs_mode_info mode_info; + struct cgs_mode_info mode_info = {0}; info.mode_info = &mode_info; @@ -3948,10 +3948,9 @@ static int smu7_program_display_gap(struct pp_hwmgr *hwmgr) uint32_t ref_clock; uint32_t refresh_rate = 0; struct cgs_display_info info = {0}; - struct cgs_mode_info mode_info; + struct cgs_mode_info mode_info = {0}; info.mode_info = &mode_info; - cgs_get_active_displays_info(hwmgr->device, &info); num_active_displays = info.display_count; @@ -3967,6 +3966,7 @@ static int smu7_program_display_gap(struct pp_hwmgr *hwmgr) frame_time_in_us = 1000000 / refresh_rate; pre_vbi_time_in_us = frame_time_in_us - 200 - mode_info.vblank_time_us; + data->frame_time_x2 = frame_time_in_us * 2 / 100; display_gap2 = pre_vbi_time_in_us * (ref_clock / 100); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 0519338e0e5e..4f79c21f27ed 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -1807,6 +1807,10 @@ static int vega10_populate_all_memory_levels(struct pp_hwmgr *hwmgr) mem_channels = (cgs_read_register(hwmgr->device, reg) & DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK) >> DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; + PP_ASSERT_WITH_CODE(mem_channels < ARRAY_SIZE(channel_number), + "Mem Channel Index Exceeded maximum!", + return -1); + pp_table->NumMemoryChannels = cpu_to_le16(mem_channels); pp_table->MemoryChannelWidth = cpu_to_le16(HBM_MEMORY_CHANNEL_WIDTH * @@ -2879,6 +2883,15 @@ static int vega10_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "DPM is already running right , skipping re-enablement!", return 0); + if ((data->smu_version == 0x001c2c00) || + (data->smu_version == 0x001c2d00)) { + tmp_result = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_UpdatePkgPwrPidAlpha, 1); + PP_ASSERT_WITH_CODE(!tmp_result, + "Failed to set package power PID!", + return tmp_result); + } + tmp_result = vega10_construct_voltage_tables(hwmgr); PP_ASSERT_WITH_CODE(!tmp_result, "Failed to contruct voltage tables!", @@ -3125,6 +3138,8 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock; if (PP_CAP(PHM_PlatformCaps_StablePState)) { + stable_pstate_sclk_dpm_percentage = + data->registry_data.stable_pstate_sclk_dpm_percentage; PP_ASSERT_WITH_CODE( data->registry_data.stable_pstate_sclk_dpm_percentage >= 1 && data->registry_data.stable_pstate_sclk_dpm_percentage <= 100, @@ -4225,7 +4240,7 @@ static void vega10_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode) vega10_fan_ctrl_stop_smc_fan_control(hwmgr); break; case AMD_FAN_CTRL_AUTO: - if (!vega10_fan_ctrl_set_static_mode(hwmgr, mode)) + if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl)) vega10_fan_ctrl_start_smc_fan_control(hwmgr); break; default: diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c index 1feefac49ea9..dc3761bcb9b6 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c @@ -365,8 +365,8 @@ int vega10_thermal_get_temperature(struct pp_hwmgr *hwmgr) temp = cgs_read_register(hwmgr->device, reg); - temp = (temp & CG_MULT_THERMAL_STATUS__ASIC_MAX_TEMP_MASK) >> - CG_MULT_THERMAL_STATUS__ASIC_MAX_TEMP__SHIFT; + temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> + CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; temp = temp & 0x1ff; diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h index d06ece4ac47d..247c97397a27 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h @@ -131,7 +131,8 @@ typedef uint16_t PPSMC_Result; #define PPSMC_MSG_RunAcgInOpenLoop 0x5E #define PPSMC_MSG_InitializeAcg 0x5F #define PPSMC_MSG_GetCurrPkgPwr 0x61 -#define PPSMC_Message_Count 0x62 +#define PPSMC_MSG_UpdatePkgPwrPidAlpha 0x68 +#define PPSMC_Message_Count 0x69 typedef int PPSMC_Msg; diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/drm_dp_dual_mode_helper.c index 0ef9011a1856..02a50929af67 100644 --- a/drivers/gpu/drm/drm_dp_dual_mode_helper.c +++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c @@ -410,6 +410,7 @@ int drm_lspcon_get_mode(struct i2c_adapter *adapter, { u8 data; int ret = 0; + int retry; if (!mode) { DRM_ERROR("NULL input\n"); @@ -417,10 +418,19 @@ int drm_lspcon_get_mode(struct i2c_adapter *adapter, } /* Read Status: i2c over aux */ - ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_LSPCON_CURRENT_MODE, - &data, sizeof(data)); + for (retry = 0; retry < 6; retry++) { + if (retry) + usleep_range(500, 1000); + + ret = drm_dp_dual_mode_read(adapter, + DP_DUAL_MODE_LSPCON_CURRENT_MODE, + &data, sizeof(data)); + if (!ret) + break; + } + if (ret < 0) { - DRM_ERROR("LSPCON read(0x80, 0x41) failed\n"); + DRM_DEBUG_KMS("LSPCON read(0x80, 0x41) failed\n"); return -EFAULT; } diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index 2affe53f3fda..279c1035c12d 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -681,6 +681,7 @@ EXPORT_SYMBOL(drm_framebuffer_init); /** * drm_framebuffer_lookup - look up a drm framebuffer and grab a reference * @dev: drm device + * @file_priv: drm file to check for lease against. * @id: id of the fb object * * If successful, this grabs an additional reference to the framebuffer - diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c index 7c8b2698c6a7..ce4d2fb32810 100644 --- a/drivers/gpu/drm/drm_mode_object.c +++ b/drivers/gpu/drm/drm_mode_object.c @@ -151,6 +151,7 @@ struct drm_mode_object *__drm_mode_object_find(struct drm_device *dev, /** * drm_mode_object_find - look up a drm object with static lifetime + * @dev: drm device * @file_priv: drm file * @id: id of the mode object * @type: type of the mode object diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index e123497da0ca..963e23db0fe7 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -93,7 +93,7 @@ void drm_modeset_lock_all(struct drm_device *dev) struct drm_modeset_acquire_ctx *ctx; int ret; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL); if (WARN_ON(!ctx)) return; diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index 57cc6e37c810..09c1c4ff93ca 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -299,8 +299,8 @@ u32 drm_crtc_accurate_vblank_count(struct drm_crtc *crtc) u32 vblank; unsigned long flags; - WARN(!dev->driver->get_vblank_timestamp, - "This function requires support for accurate vblank timestamps."); + WARN_ONCE(drm_debug & DRM_UT_VBL && !dev->driver->get_vblank_timestamp, + "This function requires support for accurate vblank timestamps."); spin_lock_irqsave(&dev->vblank_time_lock, flags); diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index e651a58c18cf..82b72425a42f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -168,11 +168,13 @@ static struct drm_driver exynos_drm_driver = { static int exynos_drm_suspend(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct exynos_drm_private *private = drm_dev->dev_private; + struct exynos_drm_private *private; if (pm_runtime_suspended(dev) || !drm_dev) return 0; + private = drm_dev->dev_private; + drm_kms_helper_poll_disable(drm_dev); exynos_drm_fbdev_suspend(drm_dev); private->suspend_state = drm_atomic_helper_suspend(drm_dev); @@ -188,11 +190,12 @@ static int exynos_drm_suspend(struct device *dev) static int exynos_drm_resume(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct exynos_drm_private *private = drm_dev->dev_private; + struct exynos_drm_private *private; if (pm_runtime_suspended(dev) || !drm_dev) return 0; + private = drm_dev->dev_private; drm_atomic_helper_resume(drm_dev, private->suspend_state); exynos_drm_fbdev_resume(drm_dev); drm_kms_helper_poll_enable(drm_dev); @@ -427,6 +430,7 @@ static void exynos_drm_unbind(struct device *dev) kfree(drm->dev_private); drm->dev_private = NULL; + dev_set_drvdata(dev, NULL); drm_dev_unref(drm); } diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c index 9823477b1855..2269be91f3e1 100644 --- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c +++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c @@ -534,9 +534,12 @@ static void ade_crtc_atomic_begin(struct drm_crtc *crtc, { struct ade_crtc *acrtc = to_ade_crtc(crtc); struct ade_hw_ctx *ctx = acrtc->ctx; + struct drm_display_mode *mode = &crtc->state->mode; + struct drm_display_mode *adj_mode = &crtc->state->adjusted_mode; if (!ctx->power_on) (void)ade_power_up(ctx); + ade_ldi_set_mode(acrtc, mode, adj_mode); } static void ade_crtc_atomic_flush(struct drm_crtc *crtc, diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 66d23b619db1..6c3b0481ef82 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -64,7 +64,7 @@ i915-y += intel_uc.o \ intel_guc.o \ intel_guc_ct.o \ intel_guc_log.o \ - intel_guc_loader.o \ + intel_guc_fw.o \ intel_huc.o \ i915_guc_submission.o diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 2c0ccbb817dc..701a3c6f1669 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2734,6 +2734,9 @@ static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) uint32_t per_ctx_start[CACHELINE_DWORDS] = {0}; unsigned char *bb_start_sva; + if (!wa_ctx->per_ctx.valid) + return 0; + per_ctx_start[0] = 0x18800001; per_ctx_start[1] = wa_ctx->per_ctx.guest_gma; diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 5ec07ecf33ad..4427be18e4a9 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -734,8 +734,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, CACHELINE_BYTES; workload->wa_ctx.per_ctx.guest_gma = per_ctx & PER_CTX_ADDR_MASK; - - WARN_ON(workload->wa_ctx.indirect_ctx.size && !(per_ctx & 0x1)); + workload->wa_ctx.per_ctx.valid = per_ctx & 1; } if (emulate_schedule_in) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 2294466dd415..a5bed2e71b92 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1429,18 +1429,7 @@ static int skl_lcpll_write(struct intel_vgpu *vgpu, unsigned int offset, return 0; } -static int ring_timestamp_mmio_read(struct intel_vgpu *vgpu, - unsigned int offset, void *p_data, unsigned int bytes) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - - mmio_hw_access_pre(dev_priv); - vgpu_vreg(vgpu, offset) = I915_READ(_MMIO(offset)); - mmio_hw_access_post(dev_priv); - return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); -} - -static int instdone_mmio_read(struct intel_vgpu *vgpu, +static int mmio_read_from_hw(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; @@ -1589,6 +1578,8 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu, MMIO_F(prefix(BLT_RING_BASE), s, f, am, rm, d, r, w); \ MMIO_F(prefix(GEN6_BSD_RING_BASE), s, f, am, rm, d, r, w); \ MMIO_F(prefix(VEBOX_RING_BASE), s, f, am, rm, d, r, w); \ + if (HAS_BSD2(dev_priv)) \ + MMIO_F(prefix(GEN8_BSD2_RING_BASE), s, f, am, rm, d, r, w); \ } while (0) #define MMIO_RING_D(prefix, d) \ @@ -1635,10 +1626,9 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) #undef RING_REG #define RING_REG(base) (base + 0x6c) - MMIO_RING_DFH(RING_REG, D_ALL, 0, instdone_mmio_read, NULL); - MMIO_DH(RING_REG(GEN8_BSD2_RING_BASE), D_ALL, instdone_mmio_read, NULL); + MMIO_RING_DFH(RING_REG, D_ALL, 0, mmio_read_from_hw, NULL); #undef RING_REG - MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, instdone_mmio_read, NULL); + MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, mmio_read_from_hw, NULL); MMIO_GM_RDR(0x2148, D_ALL, NULL, NULL); MMIO_GM_RDR(CCID, D_ALL, NULL, NULL); @@ -1648,7 +1638,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_RING_DFH(RING_TAIL, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_HEAD, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_CTL, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_RING_DFH(RING_ACTHD, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_ACTHD, D_ALL, F_CMD_ACCESS, mmio_read_from_hw, NULL); MMIO_RING_GM_RDR(RING_START, D_ALL, NULL, NULL); /* RING MODE */ @@ -1662,9 +1652,9 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_TIMESTAMP, D_ALL, F_CMD_ACCESS, - ring_timestamp_mmio_read, NULL); + mmio_read_from_hw, NULL); MMIO_RING_DFH(RING_TIMESTAMP_UDW, D_ALL, F_CMD_ACCESS, - ring_timestamp_mmio_read, NULL); + mmio_read_from_hw, NULL); MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK | F_CMD_ACCESS, @@ -2411,9 +2401,6 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) struct drm_i915_private *dev_priv = gvt->dev_priv; int ret; - MMIO_DFH(RING_IMR(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, NULL, - intel_vgpu_reg_imr_handler); - MMIO_DH(GEN8_GT_IMR(0), D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler); MMIO_DH(GEN8_GT_IER(0), D_BDW_PLUS, NULL, intel_vgpu_reg_ier_handler); MMIO_DH(GEN8_GT_IIR(0), D_BDW_PLUS, NULL, intel_vgpu_reg_iir_handler); @@ -2476,68 +2463,34 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DH(GEN8_MASTER_IRQ, D_BDW_PLUS, NULL, intel_vgpu_reg_master_irq_handler); - MMIO_DFH(RING_HWSTAM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x1c134, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - - MMIO_DFH(RING_TAIL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, - NULL, NULL); - MMIO_DFH(RING_HEAD(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_CMD_ACCESS, NULL, NULL); - MMIO_GM_RDR(RING_START(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); - MMIO_DFH(RING_CTL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, - NULL, NULL); - MMIO_DFH(RING_ACTHD(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(RING_ACTHD_UDW(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x1c29c, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, - ring_mode_mmio_write); - MMIO_DFH(RING_MI_MODE(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(RING_INSTPM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(RING_TIMESTAMP(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, - ring_timestamp_mmio_read, NULL); - - MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, + mmio_read_from_hw, NULL); #define RING_REG(base) (base + 0xd0) MMIO_RING_F(RING_REG, 4, F_RO, 0, ~_MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET), D_BDW_PLUS, NULL, ring_reset_ctl_write); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO, 0, - ~_MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET), D_BDW_PLUS, NULL, - ring_reset_ctl_write); #undef RING_REG #define RING_REG(base) (base + 0x230) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, 0, NULL, elsp_mmio_write); - MMIO_DH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, elsp_mmio_write); #undef RING_REG #define RING_REG(base) (base + 0x234) MMIO_RING_F(RING_REG, 8, F_RO | F_CMD_ACCESS, 0, ~0, D_BDW_PLUS, NULL, NULL); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO | F_CMD_ACCESS, 0, - ~0LL, D_BDW_PLUS, NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x244) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, - NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x370) MMIO_RING_F(RING_REG, 48, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 48, F_RO, 0, ~0, D_BDW_PLUS, - NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x3a0) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_MODE_MASK, NULL, NULL); - MMIO_DFH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_MODE_MASK, NULL, NULL); #undef RING_REG MMIO_D(PIPEMISC(PIPE_A), D_BDW_PLUS); @@ -2557,11 +2510,9 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) #define RING_REG(base) (base + 0x270) MMIO_RING_F(RING_REG, 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL); #undef RING_REG MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL); - MMIO_GM_RDR(RING_HWS_PGA(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); @@ -2849,7 +2800,6 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(0x65f08, D_SKL | D_KBL); MMIO_D(0x320f0, D_SKL | D_KBL); - MMIO_DFH(_REG_VCS2_EXCC, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_D(0x70034, D_SKL_PLUS); MMIO_D(0x71034, D_SKL_PLUS); MMIO_D(0x72034, D_SKL_PLUS); diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h index fbd023a16f18..7d01c77a0f7a 100644 --- a/drivers/gpu/drm/i915/gvt/reg.h +++ b/drivers/gpu/drm/i915/gvt/reg.h @@ -54,9 +54,6 @@ #define VGT_SPRSTRIDE(pipe) _PIPE(pipe, _SPRA_STRIDE, _PLANE_STRIDE_2_B) -#define _REG_VECS_EXCC 0x1A028 -#define _REG_VCS2_EXCC 0x1c028 - #define _REG_701C0(pipe, plane) (0x701c0 + pipe * 0x1000 + (plane - 1) * 0x100) #define _REG_701C4(pipe, plane) (0x701c4 + pipe * 0x1000 + (plane - 1) * 0x100) diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 436377da41ba..03532dfc0cd5 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -308,20 +308,8 @@ static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu) static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu) { - struct intel_gvt_workload_scheduler *scheduler = &vgpu->gvt->scheduler; - int ring_id; - kfree(vgpu->sched_data); vgpu->sched_data = NULL; - - spin_lock_bh(&scheduler->mmio_context_lock); - for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) { - if (scheduler->engine_owner[ring_id] == vgpu) { - intel_gvt_switch_mmio(vgpu, NULL, ring_id); - scheduler->engine_owner[ring_id] = NULL; - } - } - spin_unlock_bh(&scheduler->mmio_context_lock); } static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) @@ -388,6 +376,7 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) { struct intel_gvt_workload_scheduler *scheduler = &vgpu->gvt->scheduler; + int ring_id; gvt_dbg_core("vgpu%d: stop schedule\n", vgpu->id); @@ -401,4 +390,13 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) scheduler->need_reschedule = true; scheduler->current_vgpu = NULL; } + + spin_lock_bh(&scheduler->mmio_context_lock); + for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) { + if (scheduler->engine_owner[ring_id] == vgpu) { + intel_gvt_switch_mmio(vgpu, NULL, ring_id); + scheduler->engine_owner[ring_id] = NULL; + } + } + spin_unlock_bh(&scheduler->mmio_context_lock); } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index f36b85fd6d01..2d694f6c0907 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -68,6 +68,7 @@ struct shadow_indirect_ctx { struct shadow_per_ctx { unsigned long guest_gma; unsigned long shadow_gma; + unsigned valid; }; struct intel_shadow_wa_ctx { diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0bb6e01121fc..c65e381b85f3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -83,7 +83,7 @@ static char get_active_flag(struct drm_i915_gem_object *obj) static char get_pin_flag(struct drm_i915_gem_object *obj) { - return obj->pin_display ? 'p' : ' '; + return obj->pin_global ? 'p' : ' '; } static char get_tiling_flag(struct drm_i915_gem_object *obj) @@ -180,8 +180,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) pin_count++; } seq_printf(m, " (pinned x %d)", pin_count); - if (obj->pin_display) - seq_printf(m, " (display)"); + if (obj->pin_global) + seq_printf(m, " (global)"); list_for_each_entry(vma, &obj->vma_list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -271,7 +271,9 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) goto out; total_obj_size = total_gtt_size = count = 0; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { + + spin_lock(&dev_priv->mm.obj_lock); + list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { if (count == total) break; @@ -283,7 +285,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) total_gtt_size += i915_gem_obj_total_ggtt_size(obj); } - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { + list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) { if (count == total) break; @@ -293,6 +295,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) objects[count++] = obj; total_obj_size += obj->base.size; } + spin_unlock(&dev_priv->mm.obj_lock); sort(objects, count, sizeof(*objects), obj_rank_by_stolen, NULL); @@ -454,7 +457,9 @@ static int i915_gem_object_info(struct seq_file *m, void *data) mapped_size = mapped_count = 0; purgeable_size = purgeable_count = 0; huge_size = huge_count = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { + + spin_lock(&dev_priv->mm.obj_lock); + list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) { size += obj->base.size; ++count; @@ -477,11 +482,11 @@ static int i915_gem_object_info(struct seq_file *m, void *data) seq_printf(m, "%u unbound objects, %llu bytes\n", count, size); size = count = dpy_size = dpy_count = 0; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { + list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { size += obj->base.size; ++count; - if (obj->pin_display) { + if (obj->pin_global) { dpy_size += obj->base.size; ++dpy_count; } @@ -502,6 +507,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data) page_sizes |= obj->mm.page_sizes.sg; } } + spin_unlock(&dev_priv->mm.obj_lock); + seq_printf(m, "%u bound objects, %llu bytes\n", count, size); seq_printf(m, "%u purgeable objects, %llu bytes\n", @@ -512,7 +519,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) huge_count, stringify_page_sizes(page_sizes, buf, sizeof(buf)), huge_size); - seq_printf(m, "%u display objects (pinned), %llu bytes\n", + seq_printf(m, "%u display objects (globally pinned), %llu bytes\n", dpy_count, dpy_size); seq_printf(m, "%llu [%llu] gtt total\n", @@ -568,32 +575,46 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data) struct drm_info_node *node = m->private; struct drm_i915_private *dev_priv = node_to_i915(node); struct drm_device *dev = &dev_priv->drm; - bool show_pin_display_only = !!node->info_ent->data; + struct drm_i915_gem_object **objects; struct drm_i915_gem_object *obj; u64 total_obj_size, total_gtt_size; + unsigned long nobject, n; int count, ret; + nobject = READ_ONCE(dev_priv->mm.object_count); + objects = kvmalloc_array(nobject, sizeof(*objects), GFP_KERNEL); + if (!objects) + return -ENOMEM; + ret = mutex_lock_interruptible(&dev->struct_mutex); if (ret) return ret; - total_obj_size = total_gtt_size = count = 0; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { - if (show_pin_display_only && !obj->pin_display) - continue; + count = 0; + spin_lock(&dev_priv->mm.obj_lock); + list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { + objects[count++] = obj; + if (count == nobject) + break; + } + spin_unlock(&dev_priv->mm.obj_lock); + + total_obj_size = total_gtt_size = 0; + for (n = 0; n < count; n++) { + obj = objects[n]; seq_puts(m, " "); describe_obj(m, obj); seq_putc(m, '\n'); total_obj_size += obj->base.size; total_gtt_size += i915_gem_obj_total_ggtt_size(obj); - count++; } mutex_unlock(&dev->struct_mutex); seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n", count, total_obj_size, total_gtt_size); + kvfree(objects); return 0; } @@ -643,54 +664,6 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data) return 0; } -static void print_request(struct seq_file *m, - struct drm_i915_gem_request *rq, - const char *prefix) -{ - seq_printf(m, "%s%x [%x:%x] prio=%d @ %dms: %s\n", prefix, - rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno, - rq->priotree.priority, - jiffies_to_msecs(jiffies - rq->emitted_jiffies), - rq->timeline->common->name); -} - -static int i915_gem_request_info(struct seq_file *m, void *data) -{ - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct drm_i915_gem_request *req; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int ret, any; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - any = 0; - for_each_engine(engine, dev_priv, id) { - int count; - - count = 0; - list_for_each_entry(req, &engine->timeline->requests, link) - count++; - if (count == 0) - continue; - - seq_printf(m, "%s requests: %d\n", engine->name, count); - list_for_each_entry(req, &engine->timeline->requests, link) - print_request(m, req, " "); - - any++; - } - mutex_unlock(&dev->struct_mutex); - - if (any == 0) - seq_puts(m, "No requests\n"); - - return 0; -} - static void i915_ring_seqno_info(struct seq_file *m, struct intel_engine_cs *engine) { @@ -2386,27 +2359,13 @@ static int i915_llc(struct seq_file *m, void *data) static int i915_huc_load_status_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; + struct drm_printer p; if (!HAS_HUC_UCODE(dev_priv)) return 0; - seq_puts(m, "HuC firmware status:\n"); - seq_printf(m, "\tpath: %s\n", huc_fw->path); - seq_printf(m, "\tfetch: %s\n", - intel_uc_fw_status_repr(huc_fw->fetch_status)); - seq_printf(m, "\tload: %s\n", - intel_uc_fw_status_repr(huc_fw->load_status)); - seq_printf(m, "\tversion wanted: %d.%d\n", - huc_fw->major_ver_wanted, huc_fw->minor_ver_wanted); - seq_printf(m, "\tversion found: %d.%d\n", - huc_fw->major_ver_found, huc_fw->minor_ver_found); - seq_printf(m, "\theader: offset is %d; size = %d\n", - huc_fw->header_offset, huc_fw->header_size); - seq_printf(m, "\tuCode: offset is %d; size = %d\n", - huc_fw->ucode_offset, huc_fw->ucode_size); - seq_printf(m, "\tRSA: offset is %d; size = %d\n", - huc_fw->rsa_offset, huc_fw->rsa_size); + p = drm_seq_file_printer(m); + intel_uc_fw_dump(&dev_priv->huc.fw, &p); intel_runtime_pm_get(dev_priv); seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); @@ -2418,29 +2377,14 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data) static int i915_guc_load_status_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; + struct drm_printer p; u32 tmp, i; if (!HAS_GUC_UCODE(dev_priv)) return 0; - seq_printf(m, "GuC firmware status:\n"); - seq_printf(m, "\tpath: %s\n", - guc_fw->path); - seq_printf(m, "\tfetch: %s\n", - intel_uc_fw_status_repr(guc_fw->fetch_status)); - seq_printf(m, "\tload: %s\n", - intel_uc_fw_status_repr(guc_fw->load_status)); - seq_printf(m, "\tversion wanted: %d.%d\n", - guc_fw->major_ver_wanted, guc_fw->minor_ver_wanted); - seq_printf(m, "\tversion found: %d.%d\n", - guc_fw->major_ver_found, guc_fw->minor_ver_found); - seq_printf(m, "\theader: offset is %d; size = %d\n", - guc_fw->header_offset, guc_fw->header_size); - seq_printf(m, "\tuCode: offset is %d; size = %d\n", - guc_fw->ucode_offset, guc_fw->ucode_size); - seq_printf(m, "\tRSA: offset is %d; size = %d\n", - guc_fw->rsa_offset, guc_fw->rsa_size); + p = drm_seq_file_printer(m); + intel_uc_fw_dump(&dev_priv->guc.fw, &p); intel_runtime_pm_get(dev_priv); @@ -3310,6 +3254,16 @@ static int i915_engine_info(struct seq_file *m, void *unused) return 0; } +static int i915_shrinker_info(struct seq_file *m, void *unused) +{ + struct drm_i915_private *i915 = node_to_i915(m->private); + + seq_printf(m, "seeks = %d\n", i915->mm.shrinker.seeks); + seq_printf(m, "batch = %lu\n", i915->mm.shrinker.batch); + + return 0; +} + static int i915_semaphore_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -4225,18 +4179,20 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops, i915_ring_test_irq_get, i915_ring_test_irq_set, "0x%08llx\n"); -#define DROP_UNBOUND 0x1 -#define DROP_BOUND 0x2 -#define DROP_RETIRE 0x4 -#define DROP_ACTIVE 0x8 -#define DROP_FREED 0x10 -#define DROP_SHRINK_ALL 0x20 +#define DROP_UNBOUND BIT(0) +#define DROP_BOUND BIT(1) +#define DROP_RETIRE BIT(2) +#define DROP_ACTIVE BIT(3) +#define DROP_FREED BIT(4) +#define DROP_SHRINK_ALL BIT(5) +#define DROP_IDLE BIT(6) #define DROP_ALL (DROP_UNBOUND | \ DROP_BOUND | \ DROP_RETIRE | \ DROP_ACTIVE | \ DROP_FREED | \ - DROP_SHRINK_ALL) + DROP_SHRINK_ALL |\ + DROP_IDLE) static int i915_drop_caches_get(void *data, u64 *val) { @@ -4252,7 +4208,8 @@ i915_drop_caches_set(void *data, u64 val) struct drm_device *dev = &dev_priv->drm; int ret = 0; - DRM_DEBUG("Dropping caches: 0x%08llx\n", val); + DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", + val, val & DROP_ALL); /* No need to check and wait for gpu resets, only libdrm auto-restarts * on ioctls on -EAGAIN. */ @@ -4283,6 +4240,9 @@ i915_drop_caches_set(void *data, u64 val) i915_gem_shrink_all(dev_priv); fs_reclaim_release(GFP_KERNEL); + if (val & DROP_IDLE) + drain_delayed_work(&dev_priv->gt.idle_work); + if (val & DROP_FREED) { synchronize_rcu(); i915_gem_drain_freed_objects(dev_priv); @@ -4751,9 +4711,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_capabilities", i915_capabilities, 0}, {"i915_gem_objects", i915_gem_object_info, 0}, {"i915_gem_gtt", i915_gem_gtt_info, 0}, - {"i915_gem_pin_display", i915_gem_gtt_info, 0, (void *)1}, {"i915_gem_stolen", i915_gem_stolen_list_info }, - {"i915_gem_request", i915_gem_request_info, 0}, {"i915_gem_seqno", i915_gem_seqno_info, 0}, {"i915_gem_fence_regs", i915_gem_fence_regs_info, 0}, {"i915_gem_interrupt", i915_interrupt_info, 0}, @@ -4791,6 +4749,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_dmc_info", i915_dmc_info, 0}, {"i915_display_info", i915_display_info, 0}, {"i915_engine_info", i915_engine_info, 0}, + {"i915_shrinker_info", i915_shrinker_info, 0}, {"i915_semaphore_status", i915_semaphore_status, 0}, {"i915_shared_dplls_info", i915_shared_dplls_info, 0}, {"i915_dp_mst_info", i915_dp_mst_info, 0}, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c7b2ca6aff05..54b5d4c582b6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -80,8 +80,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20171012" -#define DRIVER_TIMESTAMP 1507831511 +#define DRIVER_DATE "20171023" +#define DRIVER_TIMESTAMP 1508748913 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -785,7 +785,6 @@ struct intel_csr { func(has_logical_ring_contexts); \ func(has_logical_ring_preemption); \ func(has_overlay); \ - func(has_pipe_cxsr); \ func(has_pooled_eu); \ func(has_psr); \ func(has_rc6); \ @@ -1108,6 +1107,16 @@ struct intel_fbc { int src_w; int src_h; bool visible; + /* + * Display surface base address adjustement for + * pageflips. Note that on gen4+ this only adjusts up + * to a tile, offsets within a tile are handled in + * the hw itself (with the TILEOFF register). + */ + int adjusted_x; + int adjusted_y; + + int y; } plane; struct { @@ -1490,6 +1499,9 @@ struct i915_gem_mm { * always the inner lock when overlapping with struct_mutex. */ struct mutex stolen_lock; + /* Protects bound_list/unbound_list and #drm_i915_gem_object.mm.link */ + spinlock_t obj_lock; + /** List of all objects in gtt_space. Used to restore gtt * mappings on resume */ struct list_head bound_list; @@ -1510,6 +1522,7 @@ struct i915_gem_mm { */ struct llist_head free_list; struct work_struct free_work; + spinlock_t free_lock; /** * Small stash of WC pages @@ -1765,6 +1778,8 @@ struct intel_vbt_data { u16 panel_id; struct mipi_config *config; struct mipi_pps_data *pps; + u16 bl_ports; + u16 cabc_ports; u8 seq_version; u32 size; u8 *data; @@ -1960,13 +1975,7 @@ struct i915_wa_reg { u32 mask; }; -/* - * RING_MAX_NONPRIV_SLOTS is per-engine but at this point we are only - * allowing it for RCS as we don't foresee any requirement of having - * a whitelist for other engines. When it is really required for - * other engines then the limit need to be increased. - */ -#define I915_MAX_WA_REGS (16 + RING_MAX_NONPRIV_SLOTS) +#define I915_MAX_WA_REGS 16 struct i915_workarounds { struct i915_wa_reg reg[I915_MAX_WA_REGS]; @@ -3077,6 +3086,7 @@ intel_info(const struct drm_i915_private *dev_priv) #define CNL_REVID_A0 0x0 #define CNL_REVID_B0 0x1 +#define CNL_REVID_C0 0x2 #define IS_CNL_REVID(p, since, until) \ (IS_CANNONLAKE(p) && IS_REVID(p, since, until)) @@ -3168,7 +3178,6 @@ intel_info(const struct drm_i915_private *dev_priv) #define I915_HAS_HOTPLUG(dev_priv) ((dev_priv)->info.has_hotplug) #define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) -#define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr) #define HAS_FBC(dev_priv) ((dev_priv)->info.has_fbc) #define HAS_CUR_FBC(dev_priv) (!HAS_GMCH_DISPLAY(dev_priv) && INTEL_INFO(dev_priv)->gen >= 7) @@ -3565,10 +3574,16 @@ i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) return __i915_gem_object_get_pages(obj); } +static inline bool +i915_gem_object_has_pages(struct drm_i915_gem_object *obj) +{ + return !IS_ERR_OR_NULL(READ_ONCE(obj->mm.pages)); +} + static inline void __i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { - GEM_BUG_ON(!obj->mm.pages); + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); atomic_inc(&obj->mm.pages_pin_count); } @@ -3582,8 +3597,8 @@ i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj) static inline void __i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) { + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - GEM_BUG_ON(!obj->mm.pages); atomic_dec(&obj->mm.pages_pin_count); } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 20fcac37c85a..94b23fcbc989 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -56,7 +56,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) return true; - return obj->pin_display; + return obj->pin_global; /* currently in use by HW, keep flushed */ } static int @@ -1240,7 +1240,23 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, if (ret) return ret; - intel_runtime_pm_get(i915); + if (i915_gem_object_has_struct_page(obj)) { + /* + * Avoid waking the device up if we can fallback, as + * waking/resuming is very slow (worst-case 10-100 ms + * depending on PCI sleeps and our own resume time). + * This easily dwarfs any performance advantage from + * using the cache bypass of indirect GGTT access. + */ + if (!intel_runtime_pm_get_if_in_use(i915)) { + ret = -EFAULT; + goto out_unlock; + } + } else { + /* No backing pages, no fallback, we must force GGTT access */ + intel_runtime_pm_get(i915); + } + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | PIN_NONFAULT | @@ -1257,7 +1273,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) { ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); if (ret) - goto out_unlock; + goto out_rpm; GEM_BUG_ON(!node.allocated); } @@ -1320,8 +1336,9 @@ out_unpin: } else { i915_vma_unpin(vma); } -out_unlock: +out_rpm: intel_runtime_pm_put(i915); +out_unlock: mutex_unlock(&i915->drm.struct_mutex); return ret; } @@ -1537,6 +1554,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) struct list_head *list; struct i915_vma *vma; + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + list_for_each_entry(vma, &obj->vma_list, obj_link) { if (!i915_vma_is_ggtt(vma)) break; @@ -1551,8 +1570,10 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) } i915 = to_i915(obj->base.dev); + spin_lock(&i915->mm.obj_lock); list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; - list_move_tail(&obj->global_link, list); + list_move_tail(&obj->mm.link, list); + spin_unlock(&i915->mm.obj_lock); } /** @@ -2196,7 +2217,7 @@ void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) struct address_space *mapping; lockdep_assert_held(&obj->mm.lock); - GEM_BUG_ON(obj->mm.pages); + GEM_BUG_ON(i915_gem_object_has_pages(obj)); switch (obj->mm.madv) { case I915_MADV_DONTNEED: @@ -2253,13 +2274,14 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, enum i915_mm_subclass subclass) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *pages; if (i915_gem_object_has_pinned_pages(obj)) return; GEM_BUG_ON(obj->bind_count); - if (!READ_ONCE(obj->mm.pages)) + if (!i915_gem_object_has_pages(obj)) return; /* May be called by shrinker from within get_pages() (on another bo) */ @@ -2273,6 +2295,10 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, pages = fetch_and_zero(&obj->mm.pages); GEM_BUG_ON(!pages); + spin_lock(&i915->mm.obj_lock); + list_del(&obj->mm.link); + spin_unlock(&i915->mm.obj_lock); + if (obj->mm.mapping) { void *ptr; @@ -2507,7 +2533,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, obj->mm.pages = pages; if (i915_gem_object_is_tiled(obj) && - to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES) { + i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { GEM_BUG_ON(obj->mm.quirked); __i915_gem_object_pin_pages(obj); obj->mm.quirked = true; @@ -2529,8 +2555,11 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, if (obj->mm.page_sizes.phys & ~0u << i) obj->mm.page_sizes.sg |= BIT(i); } - GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); + + spin_lock(&i915->mm.obj_lock); + list_add(&obj->mm.link, &i915->mm.unbound_list); + spin_unlock(&i915->mm.obj_lock); } static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) @@ -2563,7 +2592,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) if (err) return err; - if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) { + if (unlikely(!i915_gem_object_has_pages(obj))) { GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); err = ____i915_gem_object_get_pages(obj); @@ -2648,7 +2677,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, type &= ~I915_MAP_OVERRIDE; if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { - if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) { + if (unlikely(!i915_gem_object_has_pages(obj))) { GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); ret = ____i915_gem_object_get_pages(obj); @@ -2660,7 +2689,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, atomic_inc(&obj->mm.pages_pin_count); pinned = false; } - GEM_BUG_ON(!obj->mm.pages); + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); ptr = page_unpack_bits(obj->mm.mapping, &has_type); if (ptr && has_type != type) { @@ -2715,9 +2744,12 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, * allows it to avoid the cost of retrieving a page (either swapin * or clearing-before-use) before it is overwritten. */ - if (READ_ONCE(obj->mm.pages)) + if (i915_gem_object_has_pages(obj)) return -ENODEV; + if (obj->mm.madv != I915_MADV_WILLNEED) + return -EFAULT; + /* Before the pages are instantiated the object is treated as being * in the CPU domain. The pages will be clflushed as required before * use, and we can freely write into the pages directly. If userspace @@ -3087,7 +3119,6 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) static void nop_submit_request(struct drm_i915_gem_request *request) { - GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error)); dma_fence_set_error(&request->fence, -EIO); i915_gem_request_submit(request); @@ -3097,7 +3128,6 @@ static void nop_complete_submit_request(struct drm_i915_gem_request *request) { unsigned long flags; - GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error)); dma_fence_set_error(&request->fence, -EIO); spin_lock_irqsave(&request->engine->timeline->lock, flags); @@ -3495,7 +3525,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) { - if (!READ_ONCE(obj->pin_display)) + if (!READ_ONCE(obj->pin_global)) return; mutex_lock(&obj->base.dev->struct_mutex); @@ -3862,10 +3892,10 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); - /* Mark the pin_display early so that we account for the + /* Mark the global pin early so that we account for the * display coherency whilst setting up the cache domains. */ - obj->pin_display++; + obj->pin_global++; /* The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is @@ -3881,7 +3911,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, I915_CACHE_WT : I915_CACHE_NONE); if (ret) { vma = ERR_PTR(ret); - goto err_unpin_display; + goto err_unpin_global; } /* As the user may map the buffer once pinned in the display plane @@ -3912,7 +3942,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); } if (IS_ERR(vma)) - goto err_unpin_display; + goto err_unpin_global; vma->display_alignment = max_t(u64, vma->display_alignment, alignment); @@ -3927,8 +3957,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, return vma; -err_unpin_display: - obj->pin_display--; +err_unpin_global: + obj->pin_global--; return vma; } @@ -3937,10 +3967,10 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) { lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - if (WARN_ON(vma->obj->pin_display == 0)) + if (WARN_ON(vma->obj->pin_global == 0)) return; - if (--vma->obj->pin_display == 0) + if (--vma->obj->pin_global == 0) vma->display_alignment = I915_GTT_MIN_ALIGNMENT; /* Bump the LRU to try and avoid premature eviction whilst flipping */ @@ -4280,7 +4310,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, if (err) goto out; - if (obj->mm.pages && + if (i915_gem_object_has_pages(obj) && i915_gem_object_is_tiled(obj) && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (obj->mm.madv == I915_MADV_WILLNEED) { @@ -4299,7 +4329,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, obj->mm.madv = args->madv; /* if the object is no longer attached, discard its backing storage */ - if (obj->mm.madv == I915_MADV_DONTNEED && !obj->mm.pages) + if (obj->mm.madv == I915_MADV_DONTNEED && + !i915_gem_object_has_pages(obj)) i915_gem_object_truncate(obj); args->retained = obj->mm.madv != __I915_MADV_PURGED; @@ -4325,7 +4356,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, { mutex_init(&obj->mm.lock); - INIT_LIST_HEAD(&obj->global_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4480,13 +4510,14 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, { struct drm_i915_gem_object *obj, *on; - mutex_lock(&i915->drm.struct_mutex); intel_runtime_pm_get(i915); - llist_for_each_entry(obj, freed, freed) { + llist_for_each_entry_safe(obj, on, freed, freed) { struct i915_vma *vma, *vn; trace_i915_gem_object_destroy(obj); + mutex_lock(&i915->drm.struct_mutex); + GEM_BUG_ON(i915_gem_object_is_active(obj)); list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) { @@ -4497,14 +4528,20 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, GEM_BUG_ON(!list_empty(&obj->vma_list)); GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); - list_del(&obj->global_link); - } - intel_runtime_pm_put(i915); - mutex_unlock(&i915->drm.struct_mutex); + /* This serializes freeing with the shrinker. Since the free + * is delayed, first by RCU then by the workqueue, we want the + * shrinker to be able to free pages of unreferenced objects, + * or else we may oom whilst there are plenty of deferred + * freed objects. + */ + if (i915_gem_object_has_pages(obj)) { + spin_lock(&i915->mm.obj_lock); + list_del_init(&obj->mm.link); + spin_unlock(&i915->mm.obj_lock); + } - cond_resched(); + mutex_unlock(&i915->drm.struct_mutex); - llist_for_each_entry_safe(obj, on, freed, freed) { GEM_BUG_ON(obj->bind_count); GEM_BUG_ON(obj->userfault_count); GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); @@ -4516,7 +4553,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) atomic_set(&obj->mm.pages_pin_count, 0); __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - GEM_BUG_ON(obj->mm.pages); + GEM_BUG_ON(i915_gem_object_has_pages(obj)); if (obj->base.import_attach) drm_prime_gem_destroy(&obj->base, NULL); @@ -4527,16 +4564,29 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, kfree(obj->bit_17); i915_gem_object_free(obj); + + if (on) + cond_resched(); } + intel_runtime_pm_put(i915); } static void i915_gem_flush_free_objects(struct drm_i915_private *i915) { struct llist_node *freed; - freed = llist_del_all(&i915->mm.free_list); - if (unlikely(freed)) + /* Free the oldest, most stale object to keep the free_list short */ + freed = NULL; + if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ + /* Only one consumer of llist_del_first() allowed */ + spin_lock(&i915->mm.free_lock); + freed = llist_del_first(&i915->mm.free_list); + spin_unlock(&i915->mm.free_lock); + } + if (unlikely(freed)) { + freed->next = NULL; __i915_gem_free_objects(i915, freed); + } } static void __i915_gem_free_work(struct work_struct *work) @@ -4553,11 +4603,17 @@ static void __i915_gem_free_work(struct work_struct *work) * unbound now. */ + spin_lock(&i915->mm.free_lock); while ((freed = llist_del_all(&i915->mm.free_list))) { + spin_unlock(&i915->mm.free_lock); + __i915_gem_free_objects(i915, freed); if (need_resched()) - break; + return; + + spin_lock(&i915->mm.free_lock); } + spin_unlock(&i915->mm.free_lock); } static void __i915_gem_free_object_rcu(struct rcu_head *head) @@ -4837,6 +4893,10 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) init_unused_rings(dev_priv); BUG_ON(!dev_priv->kernel_context); + if (i915_terminally_wedged(&dev_priv->gpu_error)) { + ret = -EIO; + goto out; + } ret = i915_ppgtt_init_hw(dev_priv); if (ret) { @@ -4935,8 +4995,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * wedged. But we only want to do this where the GPU is angry, * for all other failure, such as an allocation failure, bail. */ - DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); - i915_gem_set_wedged(dev_priv); + if (!i915_terminally_wedged(&dev_priv->gpu_error)) { + DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); + i915_gem_set_wedged(dev_priv); + } ret = 0; } @@ -5036,11 +5098,15 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) goto err_priorities; INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work); + + spin_lock_init(&dev_priv->mm.obj_lock); + spin_lock_init(&dev_priv->mm.free_lock); init_llist_head(&dev_priv->mm.free_list); INIT_LIST_HEAD(&dev_priv->mm.unbound_list); INIT_LIST_HEAD(&dev_priv->mm.bound_list); INIT_LIST_HEAD(&dev_priv->mm.fence_list); INIT_LIST_HEAD(&dev_priv->mm.userfault_list); + INIT_DELAYED_WORK(&dev_priv->gt.retire_work, i915_gem_retire_work_handler); INIT_DELAYED_WORK(&dev_priv->gt.idle_work, @@ -5134,12 +5200,12 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) i915_gem_shrink(dev_priv, -1UL, NULL, I915_SHRINK_UNBOUND); i915_gem_drain_freed_objects(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); + spin_lock(&dev_priv->mm.obj_lock); for (p = phases; *p; p++) { - list_for_each_entry(obj, *p, global_link) + list_for_each_entry(obj, *p, mm.link) __start_cpu_write(obj); } - mutex_unlock(&dev_priv->drm.struct_mutex); + spin_unlock(&dev_priv->mm.obj_lock); return 0; } @@ -5458,7 +5524,17 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) goto err_unlock; } - pages = obj->mm.pages; + pages = fetch_and_zero(&obj->mm.pages); + if (pages) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + __i915_gem_object_reset_page_iter(obj); + + spin_lock(&i915->mm.obj_lock); + list_del(&obj->mm.link); + spin_unlock(&i915->mm.obj_lock); + } + obj->ops = &i915_gem_phys_ops; err = ____i915_gem_object_get_pages(obj); diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c index 8a04d33055be..f663cd919795 100644 --- a/drivers/gpu/drm/i915/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -70,6 +70,7 @@ static const struct dma_fence_ops i915_clflush_ops = { static void __i915_do_clflush(struct drm_i915_gem_object *obj) { + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); drm_clflush_sg(obj->mm.pages); intel_fb_obj_flush(obj, ORIGIN_CPU); } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 5bf96a258509..e304dcbc6042 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -106,14 +106,9 @@ static void lut_close(struct i915_gem_context *ctx) radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) { struct i915_vma *vma = rcu_dereference_raw(*slot); - struct drm_i915_gem_object *obj = vma->obj; radix_tree_iter_delete(&ctx->handles_vma, &iter, slot); - - if (!i915_vma_is_ggtt(vma)) - i915_vma_close(vma); - - __i915_gem_object_release_unless_active(obj); + __i915_gem_object_release_unless_active(vma->obj); } } @@ -198,6 +193,11 @@ static void context_close(struct i915_gem_context *ctx) { i915_gem_context_set_closed(ctx); + /* + * The LUT uses the VMA as a backpointer to unref the object, + * so we need to clear the LUT before we close all the VMA (inside + * the ppgtt). + */ lut_close(ctx); if (ctx->ppgtt) i915_ppgtt_close(&ctx->ppgtt->base); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index a5a5b7e6daae..8daa8a78cdc0 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -33,21 +33,24 @@ #include "intel_drv.h" #include "i915_trace.h" -static bool ggtt_is_idle(struct drm_i915_private *dev_priv) +I915_SELFTEST_DECLARE(static struct igt_evict_ctl { + bool fail_if_busy:1; +} igt_evict_ctl;) + +static bool ggtt_is_idle(struct drm_i915_private *i915) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct intel_engine_cs *engine; - enum intel_engine_id id; + struct intel_engine_cs *engine; + enum intel_engine_id id; - for_each_engine(engine, dev_priv, id) { - struct intel_timeline *tl; + if (i915->gt.active_requests) + return false; - tl = &ggtt->base.timeline.engine[engine->id]; - if (i915_gem_active_isset(&tl->last_request)) - return false; - } + for_each_engine(engine, i915, id) { + if (engine->last_retired_context != i915->kernel_context) + return false; + } - return true; + return true; } static int ggtt_flush(struct drm_i915_private *i915) @@ -157,7 +160,8 @@ i915_gem_evict_something(struct i915_address_space *vm, min_size, alignment, cache_level, start, end, mode); - /* Retire before we search the active list. Although we have + /* + * Retire before we search the active list. Although we have * reasonable accuracy in our retirement lists, we may have * a stray pin (preventing eviction) that can only be resolved by * retiring. @@ -182,7 +186,8 @@ search_again: BUG_ON(ret); } - /* Can we unpin some objects such as idle hw contents, + /* + * Can we unpin some objects such as idle hw contents, * or pending flips? But since only the GGTT has global entries * such as scanouts, rinbuffers and contexts, we can skip the * purge when inspecting per-process local address spaces. @@ -190,19 +195,36 @@ search_again: if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK) return -ENOSPC; - if (ggtt_is_idle(dev_priv)) { - /* If we still have pending pageflip completions, drop - * back to userspace to give our workqueues time to - * acquire our locks and unpin the old scanouts. - */ - return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC; - } + /* + * Not everything in the GGTT is tracked via VMA using + * i915_vma_move_to_active(), otherwise we could evict as required + * with minimal stalling. Instead we are forced to idle the GPU and + * explicitly retire outstanding requests which will then remove + * the pinning for active objects such as contexts and ring, + * enabling us to evict them on the next iteration. + * + * To ensure that all user contexts are evictable, we perform + * a switch to the perma-pinned kernel context. This all also gives + * us a termination condition, when the last retired context is + * the kernel's there is no more we can evict. + */ + if (!ggtt_is_idle(dev_priv)) { + if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy)) + return -EBUSY; + + ret = ggtt_flush(dev_priv); + if (ret) + return ret; - ret = ggtt_flush(dev_priv); - if (ret) - return ret; + goto search_again; + } - goto search_again; + /* + * If we still have pending pageflip completions, drop + * back to userspace to give our workqueues time to + * acquire our locks and unpin the old scanouts. + */ + return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC; found: /* drm_mm doesn't allow any other other operations while diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index daba55a4fce2..5eaa6893daaa 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1341,7 +1341,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, if (IS_ERR(pt)) goto unwind; - if (count < GEN8_PTES) + if (count < GEN8_PTES || intel_vgpu_active(vm->i915)) gen8_initialize_pt(vm, pt); gen8_ppgtt_set_pde(vm, pd, pt, pde); @@ -3594,8 +3594,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ /* clflush objects bound into the GGTT and rebind them. */ - list_for_each_entry_safe(obj, on, - &dev_priv->mm.bound_list, global_link) { + list_for_each_entry_safe(obj, on, &dev_priv->mm.bound_list, mm.link) { bool ggtt_bound = false; struct i915_vma *vma; diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index d67f1cbe842d..63ce38c1cce9 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -114,7 +114,6 @@ struct drm_i915_gem_object { /** Stolen memory for this object, instead of being backed by shmem. */ struct drm_mm_node *stolen; - struct list_head global_link; union { struct rcu_head rcu; struct llist_node freed; @@ -161,7 +160,8 @@ struct drm_i915_gem_object { /** Count of VMA actually bound by this object */ unsigned int bind_count; unsigned int active_count; - unsigned int pin_display; + /** Count of how many global VMA are currently pinned for use by HW */ + unsigned int pin_global; struct { struct mutex lock; /* protects the pages and their use */ @@ -208,6 +208,12 @@ struct drm_i915_gem_object { } get_page; /** + * Element within i915->mm.unbound_list or i915->mm.bound_list, + * locked by i915->mm.obj_lock. + */ + struct list_head link; + + /** * Advice: are the backing pages purgeable? */ unsigned int madv:2; diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 4dd4c2159a92..3703dc91eeda 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -229,7 +229,7 @@ int i915_gem_render_state_emit(struct drm_i915_gem_request *req) return 0; /* Recreate the page after shrinking */ - if (!so->vma->obj->mm.pages) + if (!i915_gem_object_has_pages(so->vma->obj)) so->batch_offset = -1; ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 74002b2d1b6f..3770e3323fc8 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -71,25 +71,6 @@ static void shrinker_unlock(struct drm_i915_private *dev_priv, bool unlock) mutex_unlock(&dev_priv->drm.struct_mutex); } -static bool any_vma_pinned(struct drm_i915_gem_object *obj) -{ - struct i915_vma *vma; - - list_for_each_entry(vma, &obj->vma_list, obj_link) { - /* Only GGTT vma may be permanently pinned, and are always - * at the start of the list. We can stop hunting as soon - * as we see a ppGTT vma. - */ - if (!i915_vma_is_ggtt(vma)) - break; - - if (i915_vma_is_pinned(vma)) - return true; - } - - return false; -} - static bool swap_available(void) { return get_nr_swap_pages() > 0; @@ -97,9 +78,6 @@ static bool swap_available(void) static bool can_release_pages(struct drm_i915_gem_object *obj) { - if (!obj->mm.pages) - return false; - /* Consider only shrinkable ojects. */ if (!i915_gem_object_is_shrinkable(obj)) return false; @@ -115,7 +93,13 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) if (atomic_read(&obj->mm.pages_pin_count) > obj->bind_count) return false; - if (any_vma_pinned(obj)) + /* If any vma are "permanently" pinned, it will prevent us from + * reclaiming the obj->mm.pages. We only allow scanout objects to claim + * a permanent pin, along with a few others like the context objects. + * To simplify the scan, and to avoid walking the list of vma under the + * object, we just check the count of its permanently pinned. + */ + if (READ_ONCE(obj->pin_global)) return false; /* We can only return physical pages to the system if we can either @@ -129,7 +113,7 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) { if (i915_gem_object_unbind(obj) == 0) __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); - return !READ_ONCE(obj->mm.pages); + return !i915_gem_object_has_pages(obj); } /** @@ -178,6 +162,18 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!shrinker_lock(dev_priv, &unlock)) return 0; + /* + * When shrinking the active list, also consider active contexts. + * Active contexts are pinned until they are retired, and so can + * not be simply unbound to retire and unpin their pages. To shrink + * the contexts, we must wait until the gpu is idle. + * + * We don't care about errors here; if we cannot wait upon the GPU, + * we will free as much as we can and hope to get a second chance. + */ + if (flags & I915_SHRINK_ACTIVE) + i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); + trace_i915_gem_shrink(dev_priv, target, flags); i915_gem_retire_requests(dev_priv); @@ -217,15 +213,20 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, continue; INIT_LIST_HEAD(&still_in_list); + + /* + * We serialize our access to unreferenced objects through + * the use of the struct_mutex. While the objects are not + * yet freed (due to RCU then a workqueue) we still want + * to be able to shrink their pages, so they remain on + * the unbound/bound list until actually freed. + */ + spin_lock(&dev_priv->mm.obj_lock); while (count < target && (obj = list_first_entry_or_null(phase->list, typeof(*obj), - global_link))) { - list_move_tail(&obj->global_link, &still_in_list); - if (!obj->mm.pages) { - list_del_init(&obj->global_link); - continue; - } + mm.link))) { + list_move_tail(&obj->mm.link, &still_in_list); if (flags & I915_SHRINK_PURGEABLE && obj->mm.madv != I915_MADV_DONTNEED) @@ -243,20 +244,24 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!can_release_pages(obj)) continue; + spin_unlock(&dev_priv->mm.obj_lock); + if (unsafe_drop_pages(obj)) { /* May arrive from get_pages on another bo */ mutex_lock_nested(&obj->mm.lock, I915_MM_SHRINKER); - if (!obj->mm.pages) { + if (!i915_gem_object_has_pages(obj)) { __i915_gem_object_invalidate(obj); - list_del_init(&obj->global_link); count += obj->base.size >> PAGE_SHIFT; } mutex_unlock(&obj->mm.lock); - scanned += obj->base.size >> PAGE_SHIFT; } + scanned += obj->base.size >> PAGE_SHIFT; + + spin_lock(&dev_priv->mm.obj_lock); } list_splice_tail(&still_in_list, phase->list); + spin_unlock(&dev_priv->mm.obj_lock); } if (flags & I915_SHRINK_BOUND) @@ -302,28 +307,39 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) static unsigned long i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { - struct drm_i915_private *dev_priv = + struct drm_i915_private *i915 = container_of(shrinker, struct drm_i915_private, mm.shrinker); struct drm_i915_gem_object *obj; - unsigned long count; - bool unlock; - - if (!shrinker_lock(dev_priv, &unlock)) - return 0; - - i915_gem_retire_requests(dev_priv); + unsigned long num_objects = 0; + unsigned long count = 0; - count = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) - if (can_release_pages(obj)) + spin_lock(&i915->mm.obj_lock); + list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) + if (can_release_pages(obj)) { count += obj->base.size >> PAGE_SHIFT; + num_objects++; + } - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { - if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) + list_for_each_entry(obj, &i915->mm.bound_list, mm.link) + if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) { count += obj->base.size >> PAGE_SHIFT; - } + num_objects++; + } + spin_unlock(&i915->mm.obj_lock); - shrinker_unlock(dev_priv, unlock); + /* Update our preferred vmscan batch size for the next pass. + * Our rough guess for an effective batch size is roughly 2 + * available GEM objects worth of pages. That is we don't want + * the shrinker to fire, until it is worth the cost of freeing an + * entire GEM object. + */ + if (num_objects) { + unsigned long avg = 2 * count / num_objects; + + i915->mm.shrinker.batch = + max((i915->mm.shrinker.batch + avg) >> 1, + 128ul /* default SHRINK_BATCH */); + } return count; } @@ -400,10 +416,6 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) container_of(nb, struct drm_i915_private, mm.oom_notifier); struct drm_i915_gem_object *obj; unsigned long unevictable, bound, unbound, freed_pages; - bool unlock; - - if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000)) - return NOTIFY_DONE; freed_pages = i915_gem_shrink_all(dev_priv); @@ -412,26 +424,20 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) * being pointed to by hardware. */ unbound = bound = unevictable = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { - if (!obj->mm.pages) - continue; - + spin_lock(&dev_priv->mm.obj_lock); + list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) { if (!can_release_pages(obj)) unevictable += obj->base.size >> PAGE_SHIFT; else unbound += obj->base.size >> PAGE_SHIFT; } - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { - if (!obj->mm.pages) - continue; - + list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { if (!can_release_pages(obj)) unevictable += obj->base.size >> PAGE_SHIFT; else bound += obj->base.size >> PAGE_SHIFT; } - - shrinker_unlock(dev_priv, unlock); + spin_unlock(&dev_priv->mm.obj_lock); if (freed_pages || unbound || bound) pr_info("Purging GPU memory, %lu pages freed, " @@ -498,6 +504,7 @@ void i915_gem_shrinker_init(struct drm_i915_private *dev_priv) dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan; dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count; dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS; + dev_priv->mm.shrinker.batch = 4096; WARN_ON(register_shrinker(&dev_priv->mm.shrinker)); dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 54fd4cfa9d07..03e7abc7e043 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -724,8 +724,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv vma->flags |= I915_VMA_GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); list_move_tail(&vma->vm_link, &ggtt->base.inactive_list); - list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); + + spin_lock(&dev_priv->mm.obj_lock); + list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); obj->bind_count++; + spin_unlock(&dev_priv->mm.obj_lock); return obj; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index fb5231f98c0d..1294cf695df0 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -269,7 +269,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, * due to the change in swizzling. */ mutex_lock(&obj->mm.lock); - if (obj->mm.pages && + if (i915_gem_object_has_pages(obj) && obj->mm.madv == I915_MADV_WILLNEED && i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (tiling == I915_TILING_NONE) { diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 4d712a4db63b..e26b23171b56 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -82,11 +82,11 @@ static void cancel_userptr(struct work_struct *work) /* We are inside a kthread context and can't be interrupted */ if (i915_gem_object_unbind(obj) == 0) __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - WARN_ONCE(obj->mm.pages, - "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_display=%d\n", + WARN_ONCE(i915_gem_object_has_pages(obj), + "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_global=%d\n", obj->bind_count, atomic_read(&obj->mm.pages_pin_count), - obj->pin_display); + obj->pin_global); mutex_unlock(&obj->base.dev->struct_mutex); @@ -221,15 +221,17 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm) /* Protected by mm_lock */ mm->mn = fetch_and_zero(&mn); } - } else { - /* someone else raced and successfully installed the mmu - * notifier, we can cancel our own errors */ + } else if (mm->mn) { + /* + * Someone else raced and successfully installed the mmu + * notifier, we can cancel our own errors. + */ err = 0; } mutex_unlock(&mm->i915->mm_lock); up_write(&mm->mm->mmap_sem); - if (mn) { + if (mn && !IS_ERR(mn)) { destroy_workqueue(mn->wq); kfree(mn); } diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index a2e8114b739d..f84c267728fd 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -610,6 +610,7 @@ done: execlists->first = rb; if (submit) { port_assign(port, last); + execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); i915_guc_submit(engine); } spin_unlock_irq(&engine->timeline->lock); @@ -633,6 +634,8 @@ static void i915_guc_irq_handler(unsigned long data) rq = port_request(&port[0]); } + if (!rq) + execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); if (!port_isset(last_port)) i915_guc_dequeue(engine); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b1296a55c1e4..f8205841868b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1388,8 +1388,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) bool tasklet = false; if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { - __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - tasklet = true; + if (READ_ONCE(engine->execlists.active)) { + __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + tasklet = true; + } } if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index bf467f30c99b..6458c309c039 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -193,7 +193,6 @@ static const struct intel_device_info intel_i965gm_info __initconst = { static const struct intel_device_info intel_g45_info __initconst = { GEN4_FEATURES, .platform = INTEL_G45, - .has_pipe_cxsr = 1, .ring_mask = RENDER_RING | BSD_RING, }; @@ -201,7 +200,6 @@ static const struct intel_device_info intel_gm45_info __initconst = { GEN4_FEATURES, .platform = INTEL_GM45, .is_mobile = 1, .has_fbc = 1, - .has_pipe_cxsr = 1, .supports_tv = 1, .ring_mask = RENDER_RING | BSD_RING, }; @@ -645,7 +643,7 @@ static void i915_pci_remove(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); i915_driver_unload(dev); - drm_dev_unref(dev); + drm_dev_put(dev); } static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 1383a2995a69..59ee808f8fd9 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2537,6 +2537,10 @@ static const struct file_operations fops = { .poll = i915_perf_poll, .read = i915_perf_read, .unlocked_ioctl = i915_perf_ioctl, + /* Our ioctl have no arguments, so it's safe to use the same function + * to handle 32bits compatibility. + */ + .compat_ioctl = i915_perf_ioctl, }; diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h index 0679a58cdbae..195203f298df 100644 --- a/drivers/gpu/drm/i915/i915_pvinfo.h +++ b/drivers/gpu/drm/i915/i915_pvinfo.h @@ -53,6 +53,7 @@ enum vgt_g2v_type { * VGT capabilities type */ #define VGT_CAPS_FULL_48BIT_PPGTT BIT(2) +#define VGT_CAPS_HWSP_EMULATION BIT(3) struct vgt_if { u64 magic; /* VGT_MAGIC */ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d2d0a83c09b6..68a58cce6ab1 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5242,7 +5242,7 @@ enum { #define DP_AUX_CH_CTL_TIME_OUT_400us (0 << 26) #define DP_AUX_CH_CTL_TIME_OUT_600us (1 << 26) #define DP_AUX_CH_CTL_TIME_OUT_800us (2 << 26) -#define DP_AUX_CH_CTL_TIME_OUT_1600us (3 << 26) +#define DP_AUX_CH_CTL_TIME_OUT_MAX (3 << 26) /* Varies per platform */ #define DP_AUX_CH_CTL_TIME_OUT_MASK (3 << 26) #define DP_AUX_CH_CTL_RECEIVE_ERROR (1 << 25) #define DP_AUX_CH_CTL_MESSAGE_SIZE_MASK (0x1f << 20) @@ -7041,6 +7041,7 @@ enum { */ #define L3_GENERAL_PRIO_CREDITS(x) (((x) >> 1) << 19) #define L3_HIGH_PRIO_CREDITS(x) (((x) >> 1) << 14) +#define L3_PRIO_CREDITS_MASK ((0x1f << 19) | (0x1f << 14)) #define GEN7_L3CNTLREG1 _MMIO(0xB01C) #define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C47FF8C diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 808ea4d5b962..e8ca67a129d2 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -41,6 +41,11 @@ static inline void debug_fence_init(struct i915_sw_fence *fence) debug_object_init(fence, &i915_sw_fence_debug_descr); } +static inline void debug_fence_init_onstack(struct i915_sw_fence *fence) +{ + debug_object_init_on_stack(fence, &i915_sw_fence_debug_descr); +} + static inline void debug_fence_activate(struct i915_sw_fence *fence) { debug_object_activate(fence, &i915_sw_fence_debug_descr); @@ -79,6 +84,10 @@ static inline void debug_fence_init(struct i915_sw_fence *fence) { } +static inline void debug_fence_init_onstack(struct i915_sw_fence *fence) +{ +} + static inline void debug_fence_activate(struct i915_sw_fence *fence) { } @@ -360,9 +369,9 @@ struct i915_sw_dma_fence_cb { struct irq_work work; }; -static void timer_i915_sw_fence_wake(unsigned long data) +static void timer_i915_sw_fence_wake(struct timer_list *t) { - struct i915_sw_dma_fence_cb *cb = (struct i915_sw_dma_fence_cb *)data; + struct i915_sw_dma_fence_cb *cb = from_timer(cb, t, timer); struct i915_sw_fence *fence; fence = xchg(&cb->fence, NULL); @@ -425,9 +434,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, i915_sw_fence_await(fence); cb->dma = NULL; - __setup_timer(&cb->timer, - timer_i915_sw_fence_wake, (unsigned long)cb, - TIMER_IRQSAFE); + timer_setup(&cb->timer, timer_i915_sw_fence_wake, TIMER_IRQSAFE); init_irq_work(&cb->work, irq_i915_sw_fence_work); if (timeout) { cb->dma = dma_fence_get(dma); @@ -507,5 +514,6 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/lib_sw_fence.c" #include "selftests/i915_sw_fence.c" #endif diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h index b72bd2956b70..bb8338450dc1 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.h +++ b/drivers/gpu/drm/i915/i915_vgpu.h @@ -30,6 +30,12 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv); bool intel_vgpu_has_full_48bit_ppgtt(struct drm_i915_private *dev_priv); +static inline bool +intel_vgpu_has_hwsp_emulation(struct drm_i915_private *dev_priv) +{ + return dev_priv->vgpu.caps & VGT_CAPS_HWSP_EMULATION; +} + int intel_vgt_balloon(struct drm_i915_private *dev_priv); void intel_vgt_deballoon(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 4dce2e0197d9..fbfab2f33023 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -54,12 +54,21 @@ i915_vma_retire(struct i915_gem_active *active, if (--obj->active_count) return; + /* Prune the shared fence arrays iff completely idle (inc. external) */ + if (reservation_object_trylock(obj->resv)) { + if (reservation_object_test_signaled_rcu(obj->resv, true)) + reservation_object_add_excl_fence(obj->resv, NULL); + reservation_object_unlock(obj->resv); + } + /* Bump our place on the bound list to keep it roughly in LRU order * so that we don't steal from recently used but inactive objects * (unless we are forced to ofc!) */ + spin_lock(&rq->i915->mm.obj_lock); if (obj->bind_count) - list_move_tail(&obj->global_link, &rq->i915->mm.bound_list); + list_move_tail(&obj->mm.link, &rq->i915->mm.bound_list); + spin_unlock(&rq->i915->mm.obj_lock); obj->mm.dirty = true; /* be paranoid */ @@ -563,9 +572,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); - list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + + spin_lock(&dev_priv->mm.obj_lock); + list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); obj->bind_count++; + spin_unlock(&dev_priv->mm.obj_lock); + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); return 0; @@ -580,6 +593,7 @@ err_unpin: static void i915_vma_remove(struct i915_vma *vma) { + struct drm_i915_private *i915 = vma->vm->i915; struct drm_i915_gem_object *obj = vma->obj; GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); @@ -593,9 +607,10 @@ i915_vma_remove(struct i915_vma *vma) /* Since the unbound list is global, only move to that list if * no more VMAs exist. */ + spin_lock(&i915->mm.obj_lock); if (--obj->bind_count == 0) - list_move_tail(&obj->global_link, - &to_i915(obj->base.dev)->mm.unbound_list); + list_move_tail(&obj->mm.link, &i915->mm.unbound_list); + spin_unlock(&i915->mm.obj_lock); /* And finally now the object is completely decoupled from this vma, * we can drop its hold on the backing storage and allow it to be diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 9d5b42953436..fd23023df7c1 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -691,6 +691,48 @@ parse_psr(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) dev_priv->vbt.psr.tp2_tp3_wakeup_time = psr_table->tp2_tp3_wakeup_time; } +static void parse_dsi_backlight_ports(struct drm_i915_private *dev_priv, + u16 version, enum port port) +{ + if (!dev_priv->vbt.dsi.config->dual_link || version < 197) { + dev_priv->vbt.dsi.bl_ports = BIT(port); + if (dev_priv->vbt.dsi.config->cabc_supported) + dev_priv->vbt.dsi.cabc_ports = BIT(port); + + return; + } + + switch (dev_priv->vbt.dsi.config->dl_dcs_backlight_ports) { + case DL_DCS_PORT_A: + dev_priv->vbt.dsi.bl_ports = BIT(PORT_A); + break; + case DL_DCS_PORT_C: + dev_priv->vbt.dsi.bl_ports = BIT(PORT_C); + break; + default: + case DL_DCS_PORT_A_AND_C: + dev_priv->vbt.dsi.bl_ports = BIT(PORT_A) | BIT(PORT_C); + break; + } + + if (!dev_priv->vbt.dsi.config->cabc_supported) + return; + + switch (dev_priv->vbt.dsi.config->dl_dcs_cabc_ports) { + case DL_DCS_PORT_A: + dev_priv->vbt.dsi.cabc_ports = BIT(PORT_A); + break; + case DL_DCS_PORT_C: + dev_priv->vbt.dsi.cabc_ports = BIT(PORT_C); + break; + default: + case DL_DCS_PORT_A_AND_C: + dev_priv->vbt.dsi.cabc_ports = + BIT(PORT_A) | BIT(PORT_C); + break; + } +} + static void parse_mipi_config(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) @@ -699,9 +741,10 @@ parse_mipi_config(struct drm_i915_private *dev_priv, const struct mipi_config *config; const struct mipi_pps_data *pps; int panel_type = dev_priv->vbt.panel_type; + enum port port; /* parse MIPI blocks only if LFP type is MIPI */ - if (!intel_bios_is_dsi_present(dev_priv, NULL)) + if (!intel_bios_is_dsi_present(dev_priv, &port)) return; /* Initialize this to undefined indicating no generic MIPI support */ @@ -742,15 +785,7 @@ parse_mipi_config(struct drm_i915_private *dev_priv, return; } - /* - * These fields are introduced from the VBT version 197 onwards, - * so making sure that these bits are set zero in the previous - * versions. - */ - if (dev_priv->vbt.dsi.config->dual_link && bdb->version < 197) { - dev_priv->vbt.dsi.config->dl_dcs_cabc_ports = 0; - dev_priv->vbt.dsi.config->dl_dcs_backlight_ports = 0; - } + parse_dsi_backlight_ports(dev_priv, bdb->version, port); /* We have mandatory mipi config blocks. Initialize as generic panel */ dev_priv->vbt.dsi.panel_id = MIPI_DSI_GENERIC_PANEL_ID; @@ -1071,6 +1106,22 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv, } } +static const u8 cnp_ddc_pin_map[] = { + [DDC_BUS_DDI_B] = GMBUS_PIN_1_BXT, + [DDC_BUS_DDI_C] = GMBUS_PIN_2_BXT, + [DDC_BUS_DDI_D] = GMBUS_PIN_4_CNP, /* sic */ + [DDC_BUS_DDI_F] = GMBUS_PIN_3_BXT, /* sic */ +}; + +static u8 map_ddc_pin(struct drm_i915_private *dev_priv, u8 vbt_pin) +{ + if (HAS_PCH_CNP(dev_priv) && + vbt_pin > 0 && vbt_pin < ARRAY_SIZE(cnp_ddc_pin_map)) + return cnp_ddc_pin_map[vbt_pin]; + + return vbt_pin; +} + static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, u8 bdb_version) { @@ -1131,6 +1182,13 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, is_hdmi = false; } + if (port == PORT_A && is_dvi) { + DRM_DEBUG_KMS("VBT claims port A supports DVI%s, ignoring\n", + is_hdmi ? "/HDMI" : ""); + is_dvi = false; + is_hdmi = false; + } + info->supports_dvi = is_dvi; info->supports_hdmi = is_hdmi; info->supports_dp = is_dp; @@ -1156,16 +1214,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, DRM_DEBUG_KMS("Port %c is internal DP\n", port_name(port)); if (is_dvi) { - info->alternate_ddc_pin = ddc_pin; - - /* - * All VBTs that we got so far for B Stepping has this - * information wrong for Port D. So, let's just ignore for now. - */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0) && - port == PORT_D) { - info->alternate_ddc_pin = 0; - } + info->alternate_ddc_pin = map_ddc_pin(dev_priv, ddc_pin); sanitize_ddc_pin(dev_priv, port); } diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 29c62d481cef..48e1ba01ccf8 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -74,9 +74,10 @@ static noinline void missed_breadcrumb(struct intel_engine_cs *engine) set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); } -static void intel_breadcrumbs_hangcheck(unsigned long data) +static void intel_breadcrumbs_hangcheck(struct timer_list *t) { - struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct intel_engine_cs *engine = from_timer(engine, t, + breadcrumbs.hangcheck); struct intel_breadcrumbs *b = &engine->breadcrumbs; if (!b->irq_armed) @@ -108,9 +109,10 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) } } -static void intel_breadcrumbs_fake_irq(unsigned long data) +static void intel_breadcrumbs_fake_irq(struct timer_list *t) { - struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct intel_engine_cs *engine = from_timer(engine, t, + breadcrumbs.fake_irq); struct intel_breadcrumbs *b = &engine->breadcrumbs; /* The timer persists in case we cannot enable interrupts, @@ -787,12 +789,8 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) spin_lock_init(&b->rb_lock); spin_lock_init(&b->irq_lock); - setup_timer(&b->fake_irq, - intel_breadcrumbs_fake_irq, - (unsigned long)engine); - setup_timer(&b->hangcheck, - intel_breadcrumbs_hangcheck, - (unsigned long)engine); + timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0); + timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0); /* Spawn a thread to provide a common bottom-half for all signals. * As this is an asynchronous interface we cannot steal the current diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 668e8c3e791d..437339f5d098 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -344,10 +344,25 @@ static bool intel_crt_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { + return true; +} + +static bool pch_crt_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) +{ + pipe_config->has_pch_encoder = true; + + return true; +} + +static bool hsw_crt_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) +{ struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (HAS_PCH_SPLIT(dev_priv)) - pipe_config->has_pch_encoder = true; + pipe_config->has_pch_encoder = true; /* LPT FDI RX only supports 8bpc. */ if (HAS_PCH_LPT(dev_priv)) { @@ -360,8 +375,7 @@ static bool intel_crt_compute_config(struct intel_encoder *encoder, } /* FDI must always be 2.7 GHz */ - if (HAS_DDI(dev_priv)) - pipe_config->port_clock = 135000 * 2; + pipe_config->port_clock = 135000 * 2; return true; } @@ -959,11 +973,11 @@ void intel_crt_init(struct drm_i915_private *dev_priv) !dmi_check_system(intel_spurious_crt_detect)) crt->base.hpd_pin = HPD_CRT; - crt->base.compute_config = intel_crt_compute_config; if (HAS_DDI(dev_priv)) { crt->base.port = PORT_E; crt->base.get_config = hsw_crt_get_config; crt->base.get_hw_state = intel_ddi_get_hw_state; + crt->base.compute_config = hsw_crt_compute_config; crt->base.pre_pll_enable = hsw_pre_pll_enable_crt; crt->base.pre_enable = hsw_pre_enable_crt; crt->base.enable = hsw_enable_crt; @@ -971,9 +985,11 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.post_disable = hsw_post_disable_crt; } else { if (HAS_PCH_SPLIT(dev_priv)) { + crt->base.compute_config = pch_crt_compute_config; crt->base.disable = pch_disable_crt; crt->base.post_disable = pch_post_disable_crt; } else { + crt->base.compute_config = intel_crt_compute_config; crt->base.disable = intel_disable_crt; } crt->base.port = PORT_NONE; diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index ea5d5c9645a4..da9de47562b8 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -52,10 +52,6 @@ MODULE_FIRMWARE(I915_CSR_SKL); MODULE_FIRMWARE(I915_CSR_BXT); #define BXT_CSR_VERSION_REQUIRED CSR_VERSION(1, 7) -#define FIRMWARE_URL "https://01.org/linuxgraphics/downloads/firmware" - - - #define CSR_MAX_FW_SIZE 0x2FFF #define CSR_DEFAULT_FW_OFFSET 0xFFFFFFFF @@ -291,7 +287,8 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, css_header = (struct intel_css_header *)fw->data; if (sizeof(struct intel_css_header) != (css_header->header_len * 4)) { - DRM_ERROR("Firmware has wrong CSS header length %u bytes\n", + DRM_ERROR("DMC firmware has wrong CSS header length " + "(%u bytes)\n", (css_header->header_len * 4)); return NULL; } @@ -315,7 +312,7 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, if (csr->version != required_version) { DRM_INFO("Refusing to load DMC firmware v%u.%u," - " please use v%u.%u [" FIRMWARE_URL "].\n", + " please use v%u.%u\n", CSR_VERSION_MAJOR(csr->version), CSR_VERSION_MINOR(csr->version), CSR_VERSION_MAJOR(required_version), @@ -330,7 +327,8 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, &fw->data[readcount]; if (sizeof(struct intel_package_header) != (package_header->header_len * 4)) { - DRM_ERROR("Firmware has wrong package header length %u bytes\n", + DRM_ERROR("DMC firmware has wrong package header length " + "(%u bytes)\n", (package_header->header_len * 4)); return NULL; } @@ -351,7 +349,7 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, dmc_offset = package_header->fw_info[i].offset; } if (dmc_offset == CSR_DEFAULT_FW_OFFSET) { - DRM_ERROR("Firmware not supported for %c stepping\n", + DRM_ERROR("DMC firmware not supported for %c stepping\n", si->stepping); return NULL; } @@ -360,7 +358,8 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, /* Extract dmc_header information. */ dmc_header = (struct intel_dmc_header *)&fw->data[readcount]; if (sizeof(struct intel_dmc_header) != (dmc_header->header_len)) { - DRM_ERROR("Firmware has wrong dmc header length %u bytes\n", + DRM_ERROR("DMC firmware has wrong dmc header length " + "(%u bytes)\n", (dmc_header->header_len)); return NULL; } @@ -368,7 +367,7 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, /* Cache the dmc header info. */ if (dmc_header->mmio_count > ARRAY_SIZE(csr->mmioaddr)) { - DRM_ERROR("Firmware has wrong mmio count %u\n", + DRM_ERROR("DMC firmware has wrong mmio count %u\n", dmc_header->mmio_count); return NULL; } @@ -376,7 +375,7 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, for (i = 0; i < dmc_header->mmio_count; i++) { if (dmc_header->mmioaddr[i] < CSR_MMIO_START_RANGE || dmc_header->mmioaddr[i] > CSR_MMIO_END_RANGE) { - DRM_ERROR(" Firmware has wrong mmio address 0x%x\n", + DRM_ERROR("DMC firmware has wrong mmio address 0x%x\n", dmc_header->mmioaddr[i]); return NULL; } @@ -387,7 +386,7 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, /* fw_size is in dwords, so multiplied by 4 to convert into bytes. */ nbytes = dmc_header->fw_size * 4; if (nbytes > CSR_MAX_FW_SIZE) { - DRM_ERROR("CSR firmware too big (%u) bytes\n", nbytes); + DRM_ERROR("DMC firmware too big (%u bytes)\n", nbytes); return NULL; } csr->dmc_fw_size = dmc_header->fw_size; @@ -425,9 +424,11 @@ static void csr_load_work_fn(struct work_struct *work) CSR_VERSION_MINOR(csr->version)); } else { dev_notice(dev_priv->drm.dev, - "Failed to load DMC firmware" - " [" FIRMWARE_URL "]," - " disabling runtime power management.\n"); + "Failed to load DMC firmware %s." + " Disabling runtime power management.\n", + csr->fw_path); + dev_notice(dev_priv->drm.dev, "DMC firmware homepage: %s", + INTEL_UC_FIRMWARE_URL); } release_firmware(fw); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index b307b6fe1ce3..933c18fd4258 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -305,35 +305,34 @@ struct bxt_ddi_buf_trans { u8 scale; /* scale value */ u8 enable; /* scale enable */ u8 deemphasis; - bool default_index; /* true if the entry represents default value */ }; static const struct bxt_ddi_buf_trans bxt_ddi_translations_dp[] = { /* Idx NT mV diff db */ - { 52, 0x9A, 0, 128, true }, /* 0: 400 0 */ - { 78, 0x9A, 0, 85, false }, /* 1: 400 3.5 */ - { 104, 0x9A, 0, 64, false }, /* 2: 400 6 */ - { 154, 0x9A, 0, 43, false }, /* 3: 400 9.5 */ - { 77, 0x9A, 0, 128, false }, /* 4: 600 0 */ - { 116, 0x9A, 0, 85, false }, /* 5: 600 3.5 */ - { 154, 0x9A, 0, 64, false }, /* 6: 600 6 */ - { 102, 0x9A, 0, 128, false }, /* 7: 800 0 */ - { 154, 0x9A, 0, 85, false }, /* 8: 800 3.5 */ - { 154, 0x9A, 1, 128, false }, /* 9: 1200 0 */ + { 52, 0x9A, 0, 128, }, /* 0: 400 0 */ + { 78, 0x9A, 0, 85, }, /* 1: 400 3.5 */ + { 104, 0x9A, 0, 64, }, /* 2: 400 6 */ + { 154, 0x9A, 0, 43, }, /* 3: 400 9.5 */ + { 77, 0x9A, 0, 128, }, /* 4: 600 0 */ + { 116, 0x9A, 0, 85, }, /* 5: 600 3.5 */ + { 154, 0x9A, 0, 64, }, /* 6: 600 6 */ + { 102, 0x9A, 0, 128, }, /* 7: 800 0 */ + { 154, 0x9A, 0, 85, }, /* 8: 800 3.5 */ + { 154, 0x9A, 1, 128, }, /* 9: 1200 0 */ }; static const struct bxt_ddi_buf_trans bxt_ddi_translations_edp[] = { /* Idx NT mV diff db */ - { 26, 0, 0, 128, false }, /* 0: 200 0 */ - { 38, 0, 0, 112, false }, /* 1: 200 1.5 */ - { 48, 0, 0, 96, false }, /* 2: 200 4 */ - { 54, 0, 0, 69, false }, /* 3: 200 6 */ - { 32, 0, 0, 128, false }, /* 4: 250 0 */ - { 48, 0, 0, 104, false }, /* 5: 250 1.5 */ - { 54, 0, 0, 85, false }, /* 6: 250 4 */ - { 43, 0, 0, 128, false }, /* 7: 300 0 */ - { 54, 0, 0, 101, false }, /* 8: 300 1.5 */ - { 48, 0, 0, 128, false }, /* 9: 300 0 */ + { 26, 0, 0, 128, }, /* 0: 200 0 */ + { 38, 0, 0, 112, }, /* 1: 200 1.5 */ + { 48, 0, 0, 96, }, /* 2: 200 4 */ + { 54, 0, 0, 69, }, /* 3: 200 6 */ + { 32, 0, 0, 128, }, /* 4: 250 0 */ + { 48, 0, 0, 104, }, /* 5: 250 1.5 */ + { 54, 0, 0, 85, }, /* 6: 250 4 */ + { 43, 0, 0, 128, }, /* 7: 300 0 */ + { 54, 0, 0, 101, }, /* 8: 300 1.5 */ + { 48, 0, 0, 128, }, /* 9: 300 0 */ }; /* BSpec has 2 recommended values - entries 0 and 8. @@ -341,16 +340,16 @@ static const struct bxt_ddi_buf_trans bxt_ddi_translations_edp[] = { */ static const struct bxt_ddi_buf_trans bxt_ddi_translations_hdmi[] = { /* Idx NT mV diff db */ - { 52, 0x9A, 0, 128, false }, /* 0: 400 0 */ - { 52, 0x9A, 0, 85, false }, /* 1: 400 3.5 */ - { 52, 0x9A, 0, 64, false }, /* 2: 400 6 */ - { 42, 0x9A, 0, 43, false }, /* 3: 400 9.5 */ - { 77, 0x9A, 0, 128, false }, /* 4: 600 0 */ - { 77, 0x9A, 0, 85, false }, /* 5: 600 3.5 */ - { 77, 0x9A, 0, 64, false }, /* 6: 600 6 */ - { 102, 0x9A, 0, 128, false }, /* 7: 800 0 */ - { 102, 0x9A, 0, 85, false }, /* 8: 800 3.5 */ - { 154, 0x9A, 1, 128, true }, /* 9: 1200 0 */ + { 52, 0x9A, 0, 128, }, /* 0: 400 0 */ + { 52, 0x9A, 0, 85, }, /* 1: 400 3.5 */ + { 52, 0x9A, 0, 64, }, /* 2: 400 6 */ + { 42, 0x9A, 0, 43, }, /* 3: 400 9.5 */ + { 77, 0x9A, 0, 128, }, /* 4: 600 0 */ + { 77, 0x9A, 0, 85, }, /* 5: 600 3.5 */ + { 77, 0x9A, 0, 64, }, /* 6: 600 6 */ + { 102, 0x9A, 0, 128, }, /* 7: 800 0 */ + { 102, 0x9A, 0, 85, }, /* 8: 800 3.5 */ + { 154, 0x9A, 1, 128, }, /* 9: 1200 0 */ }; struct cnl_ddi_buf_trans { @@ -588,6 +587,120 @@ skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) } } +static int skl_buf_trans_num_entries(enum port port, int n_entries) +{ + /* Only DDIA and DDIE can select the 10th register with DP */ + if (port == PORT_A || port == PORT_E) + return min(n_entries, 10); + else + return min(n_entries, 9); +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_dp(struct drm_i915_private *dev_priv, + enum port port, int *n_entries) +{ + if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { + const struct ddi_buf_trans *ddi_translations = + kbl_get_buf_trans_dp(dev_priv, n_entries); + *n_entries = skl_buf_trans_num_entries(port, *n_entries); + return ddi_translations; + } else if (IS_SKYLAKE(dev_priv)) { + const struct ddi_buf_trans *ddi_translations = + skl_get_buf_trans_dp(dev_priv, n_entries); + *n_entries = skl_buf_trans_num_entries(port, *n_entries); + return ddi_translations; + } else if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); + return bdw_ddi_translations_dp; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); + return hsw_ddi_translations_dp; + } + + *n_entries = 0; + return NULL; +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_edp(struct drm_i915_private *dev_priv, + enum port port, int *n_entries) +{ + if (IS_GEN9_BC(dev_priv)) { + const struct ddi_buf_trans *ddi_translations = + skl_get_buf_trans_edp(dev_priv, n_entries); + *n_entries = skl_buf_trans_num_entries(port, *n_entries); + return ddi_translations; + } else if (IS_BROADWELL(dev_priv)) { + return bdw_get_buf_trans_edp(dev_priv, n_entries); + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); + return hsw_ddi_translations_dp; + } + + *n_entries = 0; + return NULL; +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_fdi); + return bdw_ddi_translations_fdi; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); + return hsw_ddi_translations_fdi; + } + + *n_entries = 0; + return NULL; +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_GEN9_BC(dev_priv)) { + return skl_get_buf_trans_hdmi(dev_priv, n_entries); + } else if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + return bdw_ddi_translations_hdmi; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + return hsw_ddi_translations_hdmi; + } + + *n_entries = 0; + return NULL; +} + +static const struct bxt_ddi_buf_trans * +bxt_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries) +{ + *n_entries = ARRAY_SIZE(bxt_ddi_translations_dp); + return bxt_ddi_translations_dp; +} + +static const struct bxt_ddi_buf_trans * +bxt_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) +{ + if (dev_priv->vbt.edp.low_vswing) { + *n_entries = ARRAY_SIZE(bxt_ddi_translations_edp); + return bxt_ddi_translations_edp; + } + + return bxt_get_buf_trans_dp(dev_priv, n_entries); +} + +static const struct bxt_ddi_buf_trans * +bxt_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) +{ + *n_entries = ARRAY_SIZE(bxt_ddi_translations_hdmi); + return bxt_ddi_translations_hdmi; +} + static const struct cnl_ddi_buf_trans * cnl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) { @@ -657,92 +770,40 @@ cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) { - int n_hdmi_entries; - int hdmi_level; - int hdmi_default_entry; - - hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; + int n_entries, level, default_entry; - if (IS_GEN9_LP(dev_priv)) - return hdmi_level; + level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; if (IS_CANNONLAKE(dev_priv)) { - cnl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); - hdmi_default_entry = n_hdmi_entries - 1; + cnl_get_buf_trans_hdmi(dev_priv, &n_entries); + default_entry = n_entries - 1; + } else if (IS_GEN9_LP(dev_priv)) { + bxt_get_buf_trans_hdmi(dev_priv, &n_entries); + default_entry = n_entries - 1; } else if (IS_GEN9_BC(dev_priv)) { - skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); - hdmi_default_entry = 8; + intel_ddi_get_buf_trans_hdmi(dev_priv, &n_entries); + default_entry = 8; } else if (IS_BROADWELL(dev_priv)) { - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - hdmi_default_entry = 7; + intel_ddi_get_buf_trans_hdmi(dev_priv, &n_entries); + default_entry = 7; } else if (IS_HASWELL(dev_priv)) { - n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - hdmi_default_entry = 6; + intel_ddi_get_buf_trans_hdmi(dev_priv, &n_entries); + default_entry = 6; } else { WARN(1, "ddi translation table missing\n"); - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - hdmi_default_entry = 7; + return 0; } /* Choose a good default if VBT is badly populated */ - if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN || - hdmi_level >= n_hdmi_entries) - hdmi_level = hdmi_default_entry; + if (level == HDMI_LEVEL_SHIFT_UNKNOWN || level >= n_entries) + level = default_entry; - return hdmi_level; -} - -static const struct ddi_buf_trans * -intel_ddi_get_buf_trans_dp(struct drm_i915_private *dev_priv, - int *n_entries) -{ - if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { - return kbl_get_buf_trans_dp(dev_priv, n_entries); - } else if (IS_SKYLAKE(dev_priv)) { - return skl_get_buf_trans_dp(dev_priv, n_entries); - } else if (IS_BROADWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - return bdw_ddi_translations_dp; - } else if (IS_HASWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); - return hsw_ddi_translations_dp; - } - - *n_entries = 0; - return NULL; -} - -static const struct ddi_buf_trans * -intel_ddi_get_buf_trans_edp(struct drm_i915_private *dev_priv, - int *n_entries) -{ - if (IS_GEN9_BC(dev_priv)) { - return skl_get_buf_trans_edp(dev_priv, n_entries); - } else if (IS_BROADWELL(dev_priv)) { - return bdw_get_buf_trans_edp(dev_priv, n_entries); - } else if (IS_HASWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); - return hsw_ddi_translations_dp; - } - - *n_entries = 0; - return NULL; -} - -static const struct ddi_buf_trans * -intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv, - int *n_entries) -{ - if (IS_BROADWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); - return hsw_ddi_translations_fdi; - } else if (IS_HASWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); - return hsw_ddi_translations_fdi; - } + if (WARN_ON_ONCE(n_entries == 0)) + return 0; + if (WARN_ON_ONCE(level >= n_entries)) + level = n_entries - 1; - *n_entries = 0; - return NULL; + return level; } /* @@ -760,11 +821,11 @@ static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) switch (encoder->type) { case INTEL_OUTPUT_EDP: - ddi_translations = intel_ddi_get_buf_trans_edp(dev_priv, + ddi_translations = intel_ddi_get_buf_trans_edp(dev_priv, port, &n_entries); break; case INTEL_OUTPUT_DP: - ddi_translations = intel_ddi_get_buf_trans_dp(dev_priv, + ddi_translations = intel_ddi_get_buf_trans_dp(dev_priv, port, &n_entries); break; case INTEL_OUTPUT_ANALOG: @@ -776,16 +837,10 @@ static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) return; } - if (IS_GEN9_BC(dev_priv)) { - /* If we're boosting the current, set bit 31 of trans1 */ - if (dev_priv->vbt.ddi_port_info[port].dp_boost_level) - iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; - - if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP && - port != PORT_A && port != PORT_E && - n_entries > 9)) - n_entries = 9; - } + /* If we're boosting the current, set bit 31 of trans1 */ + if (IS_GEN9_BC(dev_priv) && + dev_priv->vbt.ddi_port_info[port].dp_boost_level) + iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; for (i = 0; i < n_entries; i++) { I915_WRITE(DDI_BUF_TRANS_LO(port, i), @@ -800,39 +855,32 @@ static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) * values in advance. This function programs the correct values for * HDMI/DVI use cases. */ -static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder) +static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder, + int level) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; - int n_hdmi_entries, hdmi_level; + int n_entries; enum port port = intel_ddi_get_encoder_port(encoder); - const struct ddi_buf_trans *ddi_translations_hdmi; + const struct ddi_buf_trans *ddi_translations; - hdmi_level = intel_ddi_hdmi_level(dev_priv, port); + ddi_translations = intel_ddi_get_buf_trans_hdmi(dev_priv, &n_entries); - if (IS_GEN9_BC(dev_priv)) { - ddi_translations_hdmi = skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); + if (WARN_ON_ONCE(!ddi_translations)) + return; + if (WARN_ON_ONCE(level >= n_entries)) + level = n_entries - 1; - /* If we're boosting the current, set bit 31 of trans1 */ - if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level) - iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; - } else if (IS_BROADWELL(dev_priv)) { - ddi_translations_hdmi = bdw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - } else if (IS_HASWELL(dev_priv)) { - ddi_translations_hdmi = hsw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - } else { - WARN(1, "ddi translation table missing\n"); - ddi_translations_hdmi = bdw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - } + /* If we're boosting the current, set bit 31 of trans1 */ + if (IS_GEN9_BC(dev_priv) && + dev_priv->vbt.ddi_port_info[port].hdmi_boost_level) + iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; /* Entry 9 is for HDMI: */ I915_WRITE(DDI_BUF_TRANS_LO(port, 9), - ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit); + ddi_translations[level].trans1 | iboost_bit); I915_WRITE(DDI_BUF_TRANS_HI(port, 9), - ddi_translations_hdmi[hdmi_level].trans2); + ddi_translations[level].trans2); } static void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, @@ -1108,14 +1156,14 @@ static int hsw_ddi_calc_wrpll_link(struct drm_i915_private *dev_priv, } static int skl_calc_wrpll_link(struct drm_i915_private *dev_priv, - uint32_t dpll) + enum intel_dpll_id pll_id) { i915_reg_t cfgcr1_reg, cfgcr2_reg; uint32_t cfgcr1_val, cfgcr2_val; uint32_t p0, p1, p2, dco_freq; - cfgcr1_reg = DPLL_CFGCR1(dpll); - cfgcr2_reg = DPLL_CFGCR2(dpll); + cfgcr1_reg = DPLL_CFGCR1(pll_id); + cfgcr2_reg = DPLL_CFGCR2(pll_id); cfgcr1_val = I915_READ(cfgcr1_reg); cfgcr2_val = I915_READ(cfgcr2_reg); @@ -1168,7 +1216,7 @@ static int skl_calc_wrpll_link(struct drm_i915_private *dev_priv, } static int cnl_calc_wrpll_link(struct drm_i915_private *dev_priv, - uint32_t pll_id) + enum intel_dpll_id pll_id) { uint32_t cfgcr0, cfgcr1; uint32_t p0, p1, p2, dco_freq, ref_clock; @@ -1255,7 +1303,8 @@ static void cnl_ddi_clock_get(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int link_clock = 0; - uint32_t cfgcr0, pll_id; + uint32_t cfgcr0; + enum intel_dpll_id pll_id; pll_id = intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll); @@ -1308,17 +1357,18 @@ static void skl_ddi_clock_get(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int link_clock = 0; - uint32_t dpll_ctl1, dpll; + uint32_t dpll_ctl1; + enum intel_dpll_id pll_id; - dpll = intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll); + pll_id = intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll); dpll_ctl1 = I915_READ(DPLL_CTRL1); - if (dpll_ctl1 & DPLL_CTRL1_HDMI_MODE(dpll)) { - link_clock = skl_calc_wrpll_link(dev_priv, dpll); + if (dpll_ctl1 & DPLL_CTRL1_HDMI_MODE(pll_id)) { + link_clock = skl_calc_wrpll_link(dev_priv, pll_id); } else { - link_clock = dpll_ctl1 & DPLL_CTRL1_LINK_RATE_MASK(dpll); - link_clock >>= DPLL_CTRL1_LINK_RATE_SHIFT(dpll); + link_clock = dpll_ctl1 & DPLL_CTRL1_LINK_RATE_MASK(pll_id); + link_clock >>= DPLL_CTRL1_LINK_RATE_SHIFT(pll_id); switch (link_clock) { case DPLL_CTRL1_LINK_RATE_810: @@ -1399,17 +1449,17 @@ static void hsw_ddi_clock_get(struct intel_encoder *encoder, } static int bxt_calc_pll_link(struct drm_i915_private *dev_priv, - enum intel_dpll_id dpll) + enum intel_dpll_id pll_id) { struct intel_shared_dpll *pll; struct intel_dpll_hw_state *state; struct dpll clock; /* For DDI ports we always use a shared PLL. */ - if (WARN_ON(dpll == DPLL_ID_PRIVATE)) + if (WARN_ON(pll_id == DPLL_ID_PRIVATE)) return 0; - pll = &dev_priv->shared_dplls[dpll]; + pll = &dev_priv->shared_dplls[pll_id]; state = &pll->state.hw_state; clock.m1 = 2; @@ -1428,9 +1478,9 @@ static void bxt_ddi_clock_get(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = intel_ddi_get_encoder_port(encoder); - uint32_t dpll = port; + enum intel_dpll_id pll_id = port; - pipe_config->port_clock = bxt_calc_pll_link(dev_priv, dpll); + pipe_config->port_clock = bxt_calc_pll_link(dev_priv, pll_id); ddi_dotclock_get(pipe_config); } @@ -1782,54 +1832,36 @@ static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv, I915_WRITE(DISPIO_CR_TX_BMU_CR0, tmp); } -static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level) +static void skl_ddi_set_iboost(struct intel_encoder *encoder, + int level, enum intel_output_type type) { struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); enum port port = intel_dig_port->port; - int type = encoder->type; - const struct ddi_buf_trans *ddi_translations; uint8_t iboost; - uint8_t dp_iboost, hdmi_iboost; - int n_entries; - /* VBT may override standard boost values */ - dp_iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level; - hdmi_iboost = dev_priv->vbt.ddi_port_info[port].hdmi_boost_level; + if (type == INTEL_OUTPUT_HDMI) + iboost = dev_priv->vbt.ddi_port_info[port].hdmi_boost_level; + else + iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level; - if (type == INTEL_OUTPUT_DP) { - if (dp_iboost) { - iboost = dp_iboost; - } else { - if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) - ddi_translations = kbl_get_buf_trans_dp(dev_priv, - &n_entries); - else - ddi_translations = skl_get_buf_trans_dp(dev_priv, - &n_entries); - iboost = ddi_translations[level].i_boost; - } - } else if (type == INTEL_OUTPUT_EDP) { - if (dp_iboost) { - iboost = dp_iboost; - } else { - ddi_translations = skl_get_buf_trans_edp(dev_priv, &n_entries); + if (iboost == 0) { + const struct ddi_buf_trans *ddi_translations; + int n_entries; - if (WARN_ON(port != PORT_A && - port != PORT_E && n_entries > 9)) - n_entries = 9; + if (type == INTEL_OUTPUT_HDMI) + ddi_translations = intel_ddi_get_buf_trans_hdmi(dev_priv, &n_entries); + else if (type == INTEL_OUTPUT_EDP) + ddi_translations = intel_ddi_get_buf_trans_edp(dev_priv, port, &n_entries); + else + ddi_translations = intel_ddi_get_buf_trans_dp(dev_priv, port, &n_entries); - iboost = ddi_translations[level].i_boost; - } - } else if (type == INTEL_OUTPUT_HDMI) { - if (hdmi_iboost) { - iboost = hdmi_iboost; - } else { - ddi_translations = skl_get_buf_trans_hdmi(dev_priv, &n_entries); - iboost = ddi_translations[level].i_boost; - } - } else { - return; + if (WARN_ON_ONCE(!ddi_translations)) + return; + if (WARN_ON_ONCE(level >= n_entries)) + level = n_entries - 1; + + iboost = ddi_translations[level].i_boost; } /* Make sure that the requested I_boost is valid */ @@ -1844,38 +1876,25 @@ static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level) _skl_ddi_set_iboost(dev_priv, PORT_E, iboost); } -static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type) +static void bxt_ddi_vswing_sequence(struct intel_encoder *encoder, + int level, enum intel_output_type type) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); const struct bxt_ddi_buf_trans *ddi_translations; - u32 n_entries, i; - - if (type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.low_vswing) { - n_entries = ARRAY_SIZE(bxt_ddi_translations_edp); - ddi_translations = bxt_ddi_translations_edp; - } else if (type == INTEL_OUTPUT_DP - || type == INTEL_OUTPUT_EDP) { - n_entries = ARRAY_SIZE(bxt_ddi_translations_dp); - ddi_translations = bxt_ddi_translations_dp; - } else if (type == INTEL_OUTPUT_HDMI) { - n_entries = ARRAY_SIZE(bxt_ddi_translations_hdmi); - ddi_translations = bxt_ddi_translations_hdmi; - } else { - DRM_DEBUG_KMS("Vswing programming not done for encoder %d\n", - type); - return; - } + enum port port = encoder->port; + int n_entries; - /* Check if default value has to be used */ - if (level >= n_entries || - (type == INTEL_OUTPUT_HDMI && level == HDMI_LEVEL_SHIFT_UNKNOWN)) { - for (i = 0; i < n_entries; i++) { - if (ddi_translations[i].default_index) { - level = i; - break; - } - } - } + if (type == INTEL_OUTPUT_HDMI) + ddi_translations = bxt_get_buf_trans_hdmi(dev_priv, &n_entries); + else if (type == INTEL_OUTPUT_EDP) + ddi_translations = bxt_get_buf_trans_edp(dev_priv, &n_entries); + else + ddi_translations = bxt_get_buf_trans_dp(dev_priv, &n_entries); + + if (WARN_ON_ONCE(!ddi_translations)) + return; + if (WARN_ON_ONCE(level >= n_entries)) + level = n_entries - 1; bxt_ddi_phy_set_signal_level(dev_priv, port, ddi_translations[level].margin, @@ -1887,6 +1906,7 @@ static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; int n_entries; if (IS_CANNONLAKE(dev_priv)) { @@ -1894,11 +1914,16 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) cnl_get_buf_trans_edp(dev_priv, &n_entries); else cnl_get_buf_trans_dp(dev_priv, &n_entries); + } else if (IS_GEN9_LP(dev_priv)) { + if (encoder->type == INTEL_OUTPUT_EDP) + bxt_get_buf_trans_edp(dev_priv, &n_entries); + else + bxt_get_buf_trans_dp(dev_priv, &n_entries); } else { if (encoder->type == INTEL_OUTPUT_EDP) - intel_ddi_get_buf_trans_edp(dev_priv, &n_entries); + intel_ddi_get_buf_trans_edp(dev_priv, port, &n_entries); else - intel_ddi_get_buf_trans_dp(dev_priv, &n_entries); + intel_ddi_get_buf_trans_dp(dev_priv, port, &n_entries); } if (WARN_ON(n_entries < 1)) @@ -1910,28 +1935,26 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) DP_TRAIN_VOLTAGE_SWING_MASK; } -static void cnl_ddi_vswing_program(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type) +static void cnl_ddi_vswing_program(struct intel_encoder *encoder, + int level, enum intel_output_type type) { - const struct cnl_ddi_buf_trans *ddi_translations = NULL; - u32 n_entries, val; - int ln; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = intel_ddi_get_encoder_port(encoder); + const struct cnl_ddi_buf_trans *ddi_translations; + int n_entries, ln; + u32 val; - if (type == INTEL_OUTPUT_HDMI) { + if (type == INTEL_OUTPUT_HDMI) ddi_translations = cnl_get_buf_trans_hdmi(dev_priv, &n_entries); - } else if (type == INTEL_OUTPUT_DP) { - ddi_translations = cnl_get_buf_trans_dp(dev_priv, &n_entries); - } else if (type == INTEL_OUTPUT_EDP) { + else if (type == INTEL_OUTPUT_EDP) ddi_translations = cnl_get_buf_trans_edp(dev_priv, &n_entries); - } + else + ddi_translations = cnl_get_buf_trans_dp(dev_priv, &n_entries); - if (WARN_ON(ddi_translations == NULL)) + if (WARN_ON_ONCE(!ddi_translations)) return; - - if (level >= n_entries) { - DRM_DEBUG_KMS("DDI translation not found for level %d. Using %d instead.", level, n_entries - 1); + if (WARN_ON_ONCE(level >= n_entries)) level = n_entries - 1; - } /* Set PORT_TX_DW5 Scaling Mode Sel to 010b. */ val = I915_READ(CNL_PORT_TX_DW5_LN0(port)); @@ -1976,26 +1999,22 @@ static void cnl_ddi_vswing_program(struct drm_i915_private *dev_priv, I915_WRITE(CNL_PORT_TX_DW7_GRP(port), val); } -static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level) +static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, + int level, enum intel_output_type type) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); enum port port = intel_ddi_get_encoder_port(encoder); - int type = encoder->type; - int width = 0; - int rate = 0; + int width, rate, ln; u32 val; - int ln = 0; - if ((intel_dp) && (type == INTEL_OUTPUT_EDP || type == INTEL_OUTPUT_DP)) { - width = intel_dp->lane_count; - rate = intel_dp->link_rate; - } else if (type == INTEL_OUTPUT_HDMI) { + if (type == INTEL_OUTPUT_HDMI) { width = 4; - /* Rate is always < than 6GHz for HDMI */ + rate = 0; /* Rate is always < than 6GHz for HDMI */ } else { - MISSING_CASE(type); - return; + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + + width = intel_dp->lane_count; + rate = intel_dp->link_rate; } /* @@ -2004,7 +2023,7 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level) * else clear to 0b. */ val = I915_READ(CNL_PORT_PCS_DW1_LN0(port)); - if (type == INTEL_OUTPUT_EDP || type == INTEL_OUTPUT_DP) + if (type != INTEL_OUTPUT_HDMI) val |= COMMON_KEEPER_EN; else val &= ~COMMON_KEEPER_EN; @@ -2039,7 +2058,7 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level) I915_WRITE(CNL_PORT_TX_DW5_GRP(port), val); /* 5. Program swing and de-emphasis */ - cnl_ddi_vswing_program(dev_priv, level, port, type); + cnl_ddi_vswing_program(encoder, level, type); /* 6. Set training enable to trigger update */ val = I915_READ(CNL_PORT_TX_DW5_LN0(port)); @@ -2076,13 +2095,12 @@ u32 bxt_signal_levels(struct intel_dp *intel_dp) struct intel_digital_port *dport = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); struct intel_encoder *encoder = &dport->base; - enum port port = dport->port; - u32 level = intel_ddi_dp_level(intel_dp); + int level = intel_ddi_dp_level(intel_dp); if (IS_CANNONLAKE(dev_priv)) - cnl_ddi_vswing_sequence(encoder, level); + cnl_ddi_vswing_sequence(encoder, level, encoder->type); else - bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); + bxt_ddi_vswing_sequence(encoder, level, encoder->type); return 0; } @@ -2092,10 +2110,10 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) struct intel_digital_port *dport = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); struct intel_encoder *encoder = &dport->base; - uint32_t level = intel_ddi_dp_level(intel_dp); + int level = intel_ddi_dp_level(intel_dp); if (IS_GEN9_BC(dev_priv)) - skl_ddi_set_iboost(encoder, level); + skl_ddi_set_iboost(encoder, level, encoder->type); return DDI_BUF_TRANS_SELECT(level); } @@ -2122,8 +2140,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, * register writes. */ val = I915_READ(DPCLKA_CFGCR0); - val &= ~(DPCLKA_CFGCR0_DDI_CLK_OFF(port) | - DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port)); + val &= ~DPCLKA_CFGCR0_DDI_CLK_OFF(port); I915_WRITE(DPCLKA_CFGCR0, val); } else if (IS_GEN9_BC(dev_priv)) { /* DDI -> PLL mapping */ @@ -2141,37 +2158,52 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, } } +static void intel_ddi_clk_disable(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = intel_ddi_get_encoder_port(encoder); + + if (IS_CANNONLAKE(dev_priv)) + I915_WRITE(DPCLKA_CFGCR0, I915_READ(DPCLKA_CFGCR0) | + DPCLKA_CFGCR0_DDI_CLK_OFF(port)); + else if (IS_GEN9_BC(dev_priv)) + I915_WRITE(DPLL_CTRL2, I915_READ(DPLL_CTRL2) | + DPLL_CTRL2_DDI_CLK_OFF(port)); + else if (INTEL_GEN(dev_priv) < 9) + I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE); +} + static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, - int link_rate, uint32_t lane_count, - struct intel_shared_dpll *pll, - bool link_mst) + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = intel_ddi_get_encoder_port(encoder); struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); - uint32_t level = intel_ddi_dp_level(intel_dp); + bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); + int level = intel_ddi_dp_level(intel_dp); + + WARN_ON(is_mst && (port == PORT_A || port == PORT_E)); - WARN_ON(link_mst && (port == PORT_A || port == PORT_E)); + intel_dp_set_link_params(intel_dp, crtc_state->port_clock, + crtc_state->lane_count, is_mst); - intel_dp_set_link_params(intel_dp, link_rate, lane_count, - link_mst); - if (encoder->type == INTEL_OUTPUT_EDP) - intel_edp_panel_on(intel_dp); + intel_edp_panel_on(intel_dp); - intel_ddi_clk_select(encoder, pll); + intel_ddi_clk_select(encoder, crtc_state->shared_dpll); intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); if (IS_CANNONLAKE(dev_priv)) - cnl_ddi_vswing_sequence(encoder, level); + cnl_ddi_vswing_sequence(encoder, level, encoder->type); else if (IS_GEN9_LP(dev_priv)) - bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); + bxt_ddi_vswing_sequence(encoder, level, encoder->type); else intel_prepare_dp_ddi_buffers(encoder); intel_ddi_init_dp_buf_reg(encoder); - if (!link_mst) + if (!is_mst) intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); intel_dp_start_link_train(intel_dp); if (port != PORT_A || INTEL_GEN(dev_priv) >= 9) @@ -2179,10 +2211,8 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, } static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, - bool has_infoframe, const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state, - const struct intel_shared_dpll *pll) + const struct drm_connector_state *conn_state) { struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi; @@ -2192,84 +2222,49 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); - intel_ddi_clk_select(encoder, pll); + intel_ddi_clk_select(encoder, crtc_state->shared_dpll); intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); if (IS_CANNONLAKE(dev_priv)) - cnl_ddi_vswing_sequence(encoder, level); + cnl_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); else if (IS_GEN9_LP(dev_priv)) - bxt_ddi_vswing_sequence(dev_priv, level, port, - INTEL_OUTPUT_HDMI); + bxt_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); else - intel_prepare_hdmi_ddi_buffers(encoder); + intel_prepare_hdmi_ddi_buffers(encoder, level); if (IS_GEN9_BC(dev_priv)) - skl_ddi_set_iboost(encoder, level); + skl_ddi_set_iboost(encoder, level, INTEL_OUTPUT_HDMI); intel_dig_port->set_infoframes(&encoder->base, - has_infoframe, + crtc_state->has_infoframe, crtc_state, conn_state); } static void intel_ddi_pre_enable(struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config, + const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct drm_crtc *crtc = pipe_config->base.crtc; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; - int type = encoder->type; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; - WARN_ON(intel_crtc->config->has_pch_encoder); + WARN_ON(crtc_state->has_pch_encoder); intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { - intel_ddi_pre_enable_dp(encoder, - pipe_config->port_clock, - pipe_config->lane_count, - pipe_config->shared_dpll, - intel_crtc_has_type(pipe_config, - INTEL_OUTPUT_DP_MST)); - } - if (type == INTEL_OUTPUT_HDMI) { - intel_ddi_pre_enable_hdmi(encoder, - pipe_config->has_infoframe, - pipe_config, conn_state, - pipe_config->shared_dpll); - } + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + intel_ddi_pre_enable_hdmi(encoder, crtc_state, conn_state); + else + intel_ddi_pre_enable_dp(encoder, crtc_state, conn_state); } -static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, - const struct intel_crtc_state *old_crtc_state, - const struct drm_connector_state *old_conn_state) +static void intel_disable_ddi_buf(struct intel_encoder *encoder) { - struct drm_encoder *encoder = &intel_encoder->base; - struct drm_i915_private *dev_priv = to_i915(encoder->dev); - enum port port = intel_ddi_get_encoder_port(intel_encoder); - struct intel_digital_port *dig_port = enc_to_dig_port(encoder); - int type = intel_encoder->type; - uint32_t val; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = intel_ddi_get_encoder_port(encoder); bool wait = false; - - if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { - /* - * old_crtc_state and old_conn_state are NULL when called from - * DP_MST. The main connector associated with this port is never - * bound to a crtc for MST. - */ - bool is_mst = !old_crtc_state; - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - - /* - * Power down sink before disabling the port, otherwise we end - * up getting interrupts from the sink on detecting link loss. - */ - if (!is_mst) - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); - } + u32 val; val = I915_READ(DDI_BUF_CTL(port)); if (val & DDI_BUF_CTL_ENABLE) { @@ -2285,36 +2280,75 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, if (wait) intel_wait_ddi_buf_idle(dev_priv, port); +} - if (type == INTEL_OUTPUT_HDMI) { - dig_port->set_infoframes(encoder, false, - old_crtc_state, old_conn_state); - } +static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_dp *intel_dp = &dig_port->dp; + /* + * old_crtc_state and old_conn_state are NULL when called from + * DP_MST. The main connector associated with this port is never + * bound to a crtc for MST. + */ + bool is_mst = !old_crtc_state; - if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + /* + * Power down sink before disabling the port, otherwise we end + * up getting interrupts from the sink on detecting link loss. + */ + if (!is_mst) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); - intel_edp_panel_vdd_on(intel_dp); - intel_edp_panel_off(intel_dp); - } + intel_disable_ddi_buf(encoder); - if (dig_port) - intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); + intel_edp_panel_vdd_on(intel_dp); + intel_edp_panel_off(intel_dp); - if (IS_CANNONLAKE(dev_priv)) - I915_WRITE(DPCLKA_CFGCR0, I915_READ(DPCLKA_CFGCR0) | - DPCLKA_CFGCR0_DDI_CLK_OFF(port)); - else if (IS_GEN9_BC(dev_priv)) - I915_WRITE(DPLL_CTRL2, (I915_READ(DPLL_CTRL2) | - DPLL_CTRL2_DDI_CLK_OFF(port))); - else if (INTEL_GEN(dev_priv) < 9) - I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE); + intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); - if (type == INTEL_OUTPUT_HDMI) { - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); + intel_ddi_clk_disable(encoder); +} - intel_dp_dual_mode_set_tmds_output(intel_hdmi, false); - } +static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_hdmi *intel_hdmi = &dig_port->hdmi; + + intel_disable_ddi_buf(encoder); + + dig_port->set_infoframes(&encoder->base, false, + old_crtc_state, old_conn_state); + + intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); + + intel_ddi_clk_disable(encoder); + + intel_dp_dual_mode_set_tmds_output(intel_hdmi, false); +} + +static void intel_ddi_post_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + /* + * old_crtc_state and old_conn_state are NULL when called from + * DP_MST. The main connector associated with this port is never + * bound to a crtc for MST. + */ + if (old_crtc_state && + intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI)) + intel_ddi_post_disable_hdmi(encoder, + old_crtc_state, old_conn_state); + else + intel_ddi_post_disable_dp(encoder, + old_crtc_state, old_conn_state); } void intel_ddi_fdi_post_disable(struct intel_encoder *encoder, @@ -2334,7 +2368,8 @@ void intel_ddi_fdi_post_disable(struct intel_encoder *encoder, val &= ~FDI_RX_ENABLE; I915_WRITE(FDI_RX_CTL(PIPE_A), val); - intel_ddi_post_disable(encoder, old_crtc_state, old_conn_state); + intel_disable_ddi_buf(encoder); + intel_ddi_clk_disable(encoder); val = I915_READ(FDI_RX_MISC(PIPE_A)); val &= ~(FDI_RX_PWRDN_LANE1_MASK | FDI_RX_PWRDN_LANE0_MASK); @@ -2350,70 +2385,93 @@ void intel_ddi_fdi_post_disable(struct intel_encoder *encoder, I915_WRITE(FDI_RX_CTL(PIPE_A), val); } -static void intel_enable_ddi(struct intel_encoder *intel_encoder, - const struct intel_crtc_state *pipe_config, - const struct drm_connector_state *conn_state) +static void intel_enable_ddi_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - struct drm_encoder *encoder = &intel_encoder->base; - struct drm_i915_private *dev_priv = to_i915(encoder->dev); - enum port port = intel_ddi_get_encoder_port(intel_encoder); - int type = intel_encoder->type; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + enum port port = intel_ddi_get_encoder_port(encoder); - if (type == INTEL_OUTPUT_HDMI) { - struct intel_digital_port *intel_dig_port = - enc_to_dig_port(encoder); - bool clock_ratio = pipe_config->hdmi_high_tmds_clock_ratio; - bool scrambling = pipe_config->hdmi_scrambling; - - intel_hdmi_handle_sink_scrambling(intel_encoder, - conn_state->connector, - clock_ratio, scrambling); - - /* In HDMI/DVI mode, the port width, and swing/emphasis values - * are ignored so nothing special needs to be done besides - * enabling the port. - */ - I915_WRITE(DDI_BUF_CTL(port), - intel_dig_port->saved_port_bits | - DDI_BUF_CTL_ENABLE); - } else if (type == INTEL_OUTPUT_EDP) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + if (port == PORT_A && INTEL_GEN(dev_priv) < 9) + intel_dp_stop_link_train(intel_dp); - if (port == PORT_A && INTEL_GEN(dev_priv) < 9) - intel_dp_stop_link_train(intel_dp); + intel_edp_backlight_on(crtc_state, conn_state); + intel_psr_enable(intel_dp, crtc_state); + intel_edp_drrs_enable(intel_dp, crtc_state); - intel_edp_backlight_on(pipe_config, conn_state); - intel_psr_enable(intel_dp, pipe_config); - intel_edp_drrs_enable(intel_dp, pipe_config); - } + if (crtc_state->has_audio) + intel_audio_codec_enable(encoder, crtc_state, conn_state); +} - if (pipe_config->has_audio) - intel_audio_codec_enable(intel_encoder, pipe_config, conn_state); +static void intel_enable_ddi_hdmi(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + enum port port = intel_ddi_get_encoder_port(encoder); + + intel_hdmi_handle_sink_scrambling(encoder, + conn_state->connector, + crtc_state->hdmi_high_tmds_clock_ratio, + crtc_state->hdmi_scrambling); + + /* In HDMI/DVI mode, the port width, and swing/emphasis values + * are ignored so nothing special needs to be done besides + * enabling the port. + */ + I915_WRITE(DDI_BUF_CTL(port), + dig_port->saved_port_bits | DDI_BUF_CTL_ENABLE); + + if (crtc_state->has_audio) + intel_audio_codec_enable(encoder, crtc_state, conn_state); } -static void intel_disable_ddi(struct intel_encoder *intel_encoder, - const struct intel_crtc_state *old_crtc_state, - const struct drm_connector_state *old_conn_state) +static void intel_enable_ddi(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - struct drm_encoder *encoder = &intel_encoder->base; - int type = intel_encoder->type; + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + intel_enable_ddi_hdmi(encoder, crtc_state, conn_state); + else + intel_enable_ddi_dp(encoder, crtc_state, conn_state); +} + +static void intel_disable_ddi_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); if (old_crtc_state->has_audio) - intel_audio_codec_disable(intel_encoder); + intel_audio_codec_disable(encoder); - if (type == INTEL_OUTPUT_HDMI) { - intel_hdmi_handle_sink_scrambling(intel_encoder, - old_conn_state->connector, - false, false); - } + intel_edp_drrs_disable(intel_dp, old_crtc_state); + intel_psr_disable(intel_dp, old_crtc_state); + intel_edp_backlight_off(old_conn_state); +} + +static void intel_disable_ddi_hdmi(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + if (old_crtc_state->has_audio) + intel_audio_codec_disable(encoder); - if (type == INTEL_OUTPUT_EDP) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + intel_hdmi_handle_sink_scrambling(encoder, + old_conn_state->connector, + false, false); +} - intel_edp_drrs_disable(intel_dp, old_crtc_state); - intel_psr_disable(intel_dp, old_crtc_state); - intel_edp_backlight_off(old_conn_state); - } +static void intel_disable_ddi(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI)) + intel_disable_ddi_hdmi(encoder, old_crtc_state, old_conn_state); + else + intel_disable_ddi_dp(encoder, old_crtc_state, old_conn_state); } static void bxt_ddi_pre_pll_enable(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 7e91dc9a0fcf..f4a9a182868f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2847,7 +2847,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, if (intel_plane_ggtt_offset(state) == plane_config->base) { fb = c->primary->fb; - drm_framebuffer_reference(fb); + drm_framebuffer_get(fb); goto valid_fb; } } @@ -2878,7 +2878,7 @@ valid_fb: intel_crtc->pipe, PTR_ERR(intel_state->vma)); intel_state->vma = NULL; - drm_framebuffer_unreference(fb); + drm_framebuffer_put(fb); return; } @@ -2899,7 +2899,7 @@ valid_fb: if (i915_gem_object_is_tiled(obj)) dev_priv->preserve_bios_swizzle = true; - drm_framebuffer_reference(fb); + drm_framebuffer_get(fb); primary->fb = primary->state->fb = fb; primary->crtc = primary->state->crtc = &intel_crtc->base; @@ -3289,7 +3289,6 @@ static void i9xx_update_primary_plane(struct intel_plane *primary, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(primary->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); const struct drm_framebuffer *fb = plane_state->base.fb; enum plane plane = primary->plane; u32 linear_offset; @@ -3298,16 +3297,14 @@ static void i9xx_update_primary_plane(struct intel_plane *primary, int x = plane_state->main.x; int y = plane_state->main.y; unsigned long irqflags; + u32 dspaddr_offset; linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); if (INTEL_GEN(dev_priv) >= 4) - crtc->dspaddr_offset = plane_state->main.offset; + dspaddr_offset = plane_state->main.offset; else - crtc->dspaddr_offset = linear_offset; - - crtc->adjusted_x = x; - crtc->adjusted_y = y; + dspaddr_offset = linear_offset; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -3333,18 +3330,18 @@ static void i9xx_update_primary_plane(struct intel_plane *primary, if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { I915_WRITE_FW(DSPSURF(plane), intel_plane_ggtt_offset(plane_state) + - crtc->dspaddr_offset); + dspaddr_offset); I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x); } else if (INTEL_GEN(dev_priv) >= 4) { I915_WRITE_FW(DSPSURF(plane), intel_plane_ggtt_offset(plane_state) + - crtc->dspaddr_offset); + dspaddr_offset); I915_WRITE_FW(DSPTILEOFF(plane), (y << 16) | x); I915_WRITE_FW(DSPLINOFF(plane), linear_offset); } else { I915_WRITE_FW(DSPADDR(plane), intel_plane_ggtt_offset(plane_state) + - crtc->dspaddr_offset); + dspaddr_offset); } POSTING_READ_FW(reg); @@ -3544,100 +3541,6 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, return plane_ctl; } -static void skylake_update_primary_plane(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) -{ - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - const struct drm_framebuffer *fb = plane_state->base.fb; - enum plane_id plane_id = plane->id; - enum pipe pipe = plane->pipe; - u32 plane_ctl = plane_state->ctl; - unsigned int rotation = plane_state->base.rotation; - u32 stride = skl_plane_stride(fb, 0, rotation); - u32 aux_stride = skl_plane_stride(fb, 1, rotation); - u32 surf_addr = plane_state->main.offset; - int scaler_id = plane_state->scaler_id; - int src_x = plane_state->main.x; - int src_y = plane_state->main.y; - int src_w = drm_rect_width(&plane_state->base.src) >> 16; - int src_h = drm_rect_height(&plane_state->base.src) >> 16; - int dst_x = plane_state->base.dst.x1; - int dst_y = plane_state->base.dst.y1; - int dst_w = drm_rect_width(&plane_state->base.dst); - int dst_h = drm_rect_height(&plane_state->base.dst); - unsigned long irqflags; - - /* Sizes are 0 based */ - src_w--; - src_h--; - dst_w--; - dst_h--; - - crtc->dspaddr_offset = surf_addr; - - crtc->adjusted_x = src_x; - crtc->adjusted_y = src_y; - - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - - if (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) { - I915_WRITE_FW(PLANE_COLOR_CTL(pipe, plane_id), - PLANE_COLOR_PIPE_GAMMA_ENABLE | - PLANE_COLOR_PIPE_CSC_ENABLE | - PLANE_COLOR_PLANE_GAMMA_DISABLE); - } - - I915_WRITE_FW(PLANE_CTL(pipe, plane_id), plane_ctl); - I915_WRITE_FW(PLANE_OFFSET(pipe, plane_id), (src_y << 16) | src_x); - I915_WRITE_FW(PLANE_STRIDE(pipe, plane_id), stride); - I915_WRITE_FW(PLANE_SIZE(pipe, plane_id), (src_h << 16) | src_w); - I915_WRITE_FW(PLANE_AUX_DIST(pipe, plane_id), - (plane_state->aux.offset - surf_addr) | aux_stride); - I915_WRITE_FW(PLANE_AUX_OFFSET(pipe, plane_id), - (plane_state->aux.y << 16) | plane_state->aux.x); - - if (scaler_id >= 0) { - uint32_t ps_ctrl = 0; - - WARN_ON(!dst_w || !dst_h); - ps_ctrl = PS_SCALER_EN | PS_PLANE_SEL(plane_id) | - crtc_state->scaler_state.scalers[scaler_id].mode; - I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id), ps_ctrl); - I915_WRITE_FW(SKL_PS_PWR_GATE(pipe, scaler_id), 0); - I915_WRITE_FW(SKL_PS_WIN_POS(pipe, scaler_id), (dst_x << 16) | dst_y); - I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id), (dst_w << 16) | dst_h); - I915_WRITE_FW(PLANE_POS(pipe, plane_id), 0); - } else { - I915_WRITE_FW(PLANE_POS(pipe, plane_id), (dst_y << 16) | dst_x); - } - - I915_WRITE_FW(PLANE_SURF(pipe, plane_id), - intel_plane_ggtt_offset(plane_state) + surf_addr); - - POSTING_READ_FW(PLANE_SURF(pipe, plane_id)); - - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); -} - -static void skylake_disable_primary_plane(struct intel_plane *primary, - struct intel_crtc *crtc) -{ - struct drm_i915_private *dev_priv = to_i915(primary->base.dev); - enum plane_id plane_id = primary->id; - enum pipe pipe = primary->pipe; - unsigned long irqflags; - - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - - I915_WRITE_FW(PLANE_CTL(pipe, plane_id), 0); - I915_WRITE_FW(PLANE_SURF(pipe, plane_id), 0); - POSTING_READ_FW(PLANE_SURF(pipe, plane_id)); - - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); -} - static int __intel_display_resume(struct drm_device *dev, struct drm_atomic_state *state, @@ -3773,6 +3676,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) intel_pps_unlock_regs_wa(dev_priv); intel_modeset_init_hw(dev); + intel_init_clock_gating(dev_priv); spin_lock_irq(&dev_priv->irq_lock); if (dev_priv->display.hpd_irq_setup) @@ -6139,6 +6043,19 @@ struct intel_connector *intel_connector_alloc(void) return connector; } +/* + * Free the bits allocated by intel_connector_alloc. + * This should only be used after intel_connector_alloc has returned + * successfully, and before drm_connector_init returns successfully. + * Otherwise the destroy callbacks for the connector and the state should + * take care of proper cleanup/free + */ +void intel_connector_free(struct intel_connector *connector) +{ + kfree(to_intel_digital_connector_state(connector->base.state)); + kfree(connector); +} + /* Simple connector->get_hw_state implementation for encoders that support only * one connector and no cloning and hence the encoder state determines the state * of the connector. */ @@ -6522,11 +6439,9 @@ static void i9xx_update_pll_dividers(struct intel_crtc *crtc, crtc_state->dpll_hw_state.fp0 = fp; - crtc->lowfreq_avail = false; if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_LVDS) && reduced_clock) { crtc_state->dpll_hw_state.fp1 = fp2; - crtc->lowfreq_avail = true; } else { crtc_state->dpll_hw_state.fp1 = fp; } @@ -7221,15 +7136,6 @@ static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc) } } - if (HAS_PIPE_CXSR(dev_priv)) { - if (intel_crtc->lowfreq_avail) { - DRM_DEBUG_KMS("enabling CxSR downclocking\n"); - pipeconf |= PIPECONF_CXSR_DOWNCLOCK; - } else { - DRM_DEBUG_KMS("disabling CxSR downclocking\n"); - } - } - if (intel_crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) { if (INTEL_GEN(dev_priv) < 4 || intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_SDVO)) @@ -8365,8 +8271,6 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc, memset(&crtc_state->dpll_hw_state, 0, sizeof(crtc_state->dpll_hw_state)); - crtc->lowfreq_avail = false; - /* CPU eDP is the only output that doesn't need a PCH PLL of its own. */ if (!crtc_state->has_pch_encoder) return 0; @@ -9025,8 +8929,6 @@ static int haswell_crtc_compute_clock(struct intel_crtc *crtc, } } - crtc->lowfreq_avail = false; - return 0; } @@ -9846,7 +9748,7 @@ mode_fits_in_fbdev(struct drm_device *dev, if (obj->base.size < mode->vdisplay * fb->pitches[0]) return NULL; - drm_framebuffer_reference(fb); + drm_framebuffer_get(fb); return fb; #else return NULL; @@ -10027,7 +9929,7 @@ found: if (ret) goto fail; - drm_framebuffer_unreference(fb); + drm_framebuffer_put(fb); ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode); if (ret) @@ -10662,6 +10564,52 @@ intel_dump_m_n_config(struct intel_crtc_state *pipe_config, char *id, m_n->link_m, m_n->link_n, m_n->tu); } +#define OUTPUT_TYPE(x) [INTEL_OUTPUT_ ## x] = #x + +static const char * const output_type_str[] = { + OUTPUT_TYPE(UNUSED), + OUTPUT_TYPE(ANALOG), + OUTPUT_TYPE(DVO), + OUTPUT_TYPE(SDVO), + OUTPUT_TYPE(LVDS), + OUTPUT_TYPE(TVOUT), + OUTPUT_TYPE(HDMI), + OUTPUT_TYPE(DP), + OUTPUT_TYPE(EDP), + OUTPUT_TYPE(DSI), + OUTPUT_TYPE(UNKNOWN), + OUTPUT_TYPE(DP_MST), +}; + +#undef OUTPUT_TYPE + +static void snprintf_output_types(char *buf, size_t len, + unsigned int output_types) +{ + char *str = buf; + int i; + + str[0] = '\0'; + + for (i = 0; i < ARRAY_SIZE(output_type_str); i++) { + int r; + + if ((output_types & BIT(i)) == 0) + continue; + + r = snprintf(str, len, "%s%s", + str != buf ? "," : "", output_type_str[i]); + if (r >= len) + break; + str += r; + len -= r; + + output_types &= ~BIT(i); + } + + WARN_ON_ONCE(output_types != 0); +} + static void intel_dump_pipe_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config, const char *context) @@ -10672,10 +10620,15 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, struct intel_plane *intel_plane; struct intel_plane_state *state; struct drm_framebuffer *fb; + char buf[64]; DRM_DEBUG_KMS("[CRTC:%d:%s]%s\n", crtc->base.base.id, crtc->base.name, context); + snprintf_output_types(buf, sizeof(buf), pipe_config->output_types); + DRM_DEBUG_KMS("output_types: %s (0x%x)\n", + buf, pipe_config->output_types); + DRM_DEBUG_KMS("cpu_transcoder: %s, pipe bpp: %i, dithering: %i\n", transcoder_name(pipe_config->cpu_transcoder), pipe_config->pipe_bpp, pipe_config->dither); @@ -13229,8 +13182,8 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) num_formats = ARRAY_SIZE(skl_primary_formats); modifiers = skl_format_modifiers_ccs; - primary->update_plane = skylake_update_primary_plane; - primary->disable_plane = skylake_disable_primary_plane; + primary->update_plane = skl_update_plane; + primary->disable_plane = skl_disable_plane; } else if (INTEL_GEN(dev_priv) >= 9) { intel_primary_formats = skl_primary_formats; num_formats = ARRAY_SIZE(skl_primary_formats); @@ -13239,8 +13192,8 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) else modifiers = skl_format_modifiers_noccs; - primary->update_plane = skylake_update_primary_plane; - primary->disable_plane = skylake_disable_primary_plane; + primary->update_plane = skl_update_plane; + primary->disable_plane = skl_disable_plane; } else if (INTEL_GEN(dev_priv) >= 4) { intel_primary_formats = i965_primary_formats; num_formats = ARRAY_SIZE(i965_primary_formats); @@ -14398,8 +14351,6 @@ void intel_modeset_init_hw(struct drm_device *dev) intel_update_cdclk(dev_priv); dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; - - intel_init_clock_gating(dev_priv); } /* @@ -15111,6 +15062,15 @@ intel_modeset_setup_hw_state(struct drm_device *dev, struct intel_encoder *encoder; int i; + if (IS_HASWELL(dev_priv)) { + /* + * WaRsPkgCStateDisplayPMReq:hsw + * System hang if this isn't done before disabling all planes! + */ + I915_WRITE(CHICKEN_PAR1_1, + I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); + } + intel_modeset_readout_hw_state(dev); /* HW state is read out, now we need to sanitize this mess. */ @@ -15208,6 +15168,8 @@ void intel_modeset_gem_init(struct drm_device *dev) intel_init_gt_powersave(dev_priv); + intel_init_clock_gating(dev_priv); + intel_setup_overlay(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index b0f446b68f42..aa75f55eeb61 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1007,7 +1007,7 @@ static uint32_t g4x_get_aux_send_ctl(struct intel_dp *intel_dp, else precharge = 5; - if (IS_BROADWELL(dev_priv) && intel_dig_port->port == PORT_A) + if (IS_BROADWELL(dev_priv)) timeout = DP_AUX_CH_CTL_TIME_OUT_600us; else timeout = DP_AUX_CH_CTL_TIME_OUT_400us; @@ -1032,7 +1032,7 @@ static uint32_t skl_get_aux_send_ctl(struct intel_dp *intel_dp, DP_AUX_CH_CTL_DONE | (has_aux_irq ? DP_AUX_CH_CTL_INTERRUPT : 0) | DP_AUX_CH_CTL_TIME_OUT_ERROR | - DP_AUX_CH_CTL_TIME_OUT_1600us | + DP_AUX_CH_CTL_TIME_OUT_MAX | DP_AUX_CH_CTL_RECEIVE_ERROR | (send_bytes << DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT) | DP_AUX_CH_CTL_FW_SYNC_PULSE_SKL(32) | @@ -1832,6 +1832,8 @@ found: if (!HAS_DDI(dev_priv)) intel_dp_set_clock(encoder, pipe_config); + intel_psr_compute_config(intel_dp, pipe_config); + return true; } @@ -3153,9 +3155,7 @@ intel_dp_voltage_max(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); enum port port = dp_to_dig_port(intel_dp)->port; - if (IS_GEN9_LP(dev_priv)) - return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; - else if (INTEL_GEN(dev_priv) >= 9) { + if (INTEL_GEN(dev_priv) >= 9) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; return intel_ddi_dp_voltage_max(encoder); } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 3c131e2544cf..772521440a9f 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -454,32 +454,52 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo struct intel_dp *intel_dp = container_of(mgr, struct intel_dp, mst_mgr); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct intel_connector *intel_connector; struct drm_connector *connector; - int i; + enum pipe pipe; + int ret; intel_connector = intel_connector_alloc(); if (!intel_connector) return NULL; connector = &intel_connector->base; - drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, DRM_MODE_CONNECTOR_DisplayPort); + ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, + DRM_MODE_CONNECTOR_DisplayPort); + if (ret) { + intel_connector_free(intel_connector); + return NULL; + } + drm_connector_helper_add(connector, &intel_dp_mst_connector_helper_funcs); intel_connector->get_hw_state = intel_dp_mst_get_hw_state; intel_connector->mst_port = intel_dp; intel_connector->port = port; - for (i = PIPE_A; i <= PIPE_C; i++) { - drm_mode_connector_attach_encoder(&intel_connector->base, - &intel_dp->mst_encoders[i]->base.base); + for_each_pipe(dev_priv, pipe) { + struct drm_encoder *enc = + &intel_dp->mst_encoders[pipe]->base.base; + + ret = drm_mode_connector_attach_encoder(&intel_connector->base, + enc); + if (ret) + goto err; } drm_object_attach_property(&connector->base, dev->mode_config.path_property, 0); drm_object_attach_property(&connector->base, dev->mode_config.tile_property, 0); - drm_mode_connector_set_path_property(connector, pathprop); + ret = drm_mode_connector_set_path_property(connector, pathprop); + if (ret) + goto err; + return connector; + +err: + drm_connector_cleanup(connector); + return NULL; } static void intel_dp_register_mst_connector(struct drm_connector *connector) @@ -569,11 +589,12 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum static bool intel_dp_create_fake_mst_encoders(struct intel_digital_port *intel_dig_port) { - int i; struct intel_dp *intel_dp = &intel_dig_port->dp; + struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + enum pipe pipe; - for (i = PIPE_A; i <= PIPE_C; i++) - intel_dp->mst_encoders[i] = intel_dp_create_fake_mst_encoder(intel_dig_port, i); + for_each_pipe(dev_priv, pipe) + intel_dp->mst_encoders[pipe] = intel_dp_create_fake_mst_encoder(intel_dig_port, pipe); return true; } diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index a2a3d93d67bd..df808a94c511 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -1996,7 +1996,7 @@ static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, /* 3. Configure DPLL_CFGCR0 */ /* Avoid touch CFGCR1 if HDMI mode is not enabled */ - if (pll->state.hw_state.cfgcr0 & DPLL_CTRL1_HDMI_MODE(pll->id)) { + if (pll->state.hw_state.cfgcr0 & DPLL_CFGCR0_HDMI_MODE) { val = pll->state.hw_state.cfgcr1; I915_WRITE(CNL_DPLL_CFGCR1(pll->id), val); /* 4. Reab back to ensure writes completed */ diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index b87946dcc53f..47d022d48718 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -718,6 +718,9 @@ struct intel_crtc_state { struct intel_link_m_n dp_m2_n2; bool has_drrs; + bool has_psr; + bool has_psr2; + /* * Frequence the dpll for the port should run at. Differs from the * adjusted dotclock e.g. for DP or 12bpc hdmi mode. This is also @@ -800,18 +803,10 @@ struct intel_crtc { * some outputs connected to this crtc. */ bool active; - bool lowfreq_avail; u8 plane_ids_mask; unsigned long long enabled_power_domains; struct intel_overlay *overlay; - /* Display surface base address adjustement for pageflips. Note that on - * gen4+ this only adjusts up to a tile, offsets within a tile are - * handled in the hw itself (with the TILEOFF register). */ - u32 dspaddr_offset; - int adjusted_x; - int adjusted_y; - struct intel_crtc_state *config; /* global reset count when the last flip was submitted */ @@ -1066,7 +1061,7 @@ struct intel_digital_port { void (*write_infoframe)(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len); void (*set_infoframes)(struct drm_encoder *encoder, bool enable, @@ -1360,6 +1355,7 @@ void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv); void intel_encoder_destroy(struct drm_encoder *encoder); int intel_connector_init(struct intel_connector *); struct intel_connector *intel_connector_alloc(void); +void intel_connector_free(struct intel_connector *connector); bool intel_connector_get_hw_state(struct intel_connector *connector); void intel_connector_attach_encoder(struct intel_connector *connector, struct intel_encoder *encoder); @@ -1764,6 +1760,8 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, void intel_psr_init(struct drm_i915_private *dev_priv); void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); +void intel_psr_compute_config(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state); /* intel_runtime_pm.c */ int intel_power_domains_init(struct drm_i915_private *); @@ -1923,6 +1921,10 @@ int intel_sprite_set_colorkey(struct drm_device *dev, void *data, struct drm_file *file_priv); void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state); void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state); +void skl_update_plane(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); +void skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc); /* intel_tv.c */ void intel_tv_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 66bbedc5fa01..83f15848098a 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -1751,42 +1751,13 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) else intel_encoder->crtc_mask = BIT(PIPE_B); - if (dev_priv->vbt.dsi.config->dual_link) { + if (dev_priv->vbt.dsi.config->dual_link) intel_dsi->ports = BIT(PORT_A) | BIT(PORT_C); - - switch (dev_priv->vbt.dsi.config->dl_dcs_backlight_ports) { - case DL_DCS_PORT_A: - intel_dsi->dcs_backlight_ports = BIT(PORT_A); - break; - case DL_DCS_PORT_C: - intel_dsi->dcs_backlight_ports = BIT(PORT_C); - break; - default: - case DL_DCS_PORT_A_AND_C: - intel_dsi->dcs_backlight_ports = BIT(PORT_A) | BIT(PORT_C); - break; - } - - switch (dev_priv->vbt.dsi.config->dl_dcs_cabc_ports) { - case DL_DCS_PORT_A: - intel_dsi->dcs_cabc_ports = BIT(PORT_A); - break; - case DL_DCS_PORT_C: - intel_dsi->dcs_cabc_ports = BIT(PORT_C); - break; - default: - case DL_DCS_PORT_A_AND_C: - intel_dsi->dcs_cabc_ports = BIT(PORT_A) | BIT(PORT_C); - break; - } - } else { + else intel_dsi->ports = BIT(port); - intel_dsi->dcs_backlight_ports = BIT(port); - intel_dsi->dcs_cabc_ports = BIT(port); - } - if (!dev_priv->vbt.dsi.config->cabc_supported) - intel_dsi->dcs_cabc_ports = 0; + intel_dsi->dcs_backlight_ports = dev_priv->vbt.dsi.bl_ports; + intel_dsi->dcs_cabc_ports = dev_priv->vbt.dsi.cabc_ports; /* Create a DSI host (and a device) for each port. */ for_each_dsi_port(port, intel_dsi->ports) { diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a59b2a30ff5a..ab5bf4e2e28e 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -25,6 +25,7 @@ #include <drm/drm_print.h> #include "i915_drv.h" +#include "i915_vgpu.h" #include "intel_ringbuffer.h" #include "intel_lrc.h" @@ -386,10 +387,6 @@ static void intel_engine_init_timeline(struct intel_engine_cs *engine) static bool csb_force_mmio(struct drm_i915_private *i915) { - /* GVT emulation depends upon intercepting CSB mmio */ - if (intel_vgpu_active(i915)) - return true; - /* * IOMMU adds unpredictable latency causing the CSB write (from the * GPU into the HWSP) to only be visible some time after the interrupt @@ -398,6 +395,10 @@ static bool csb_force_mmio(struct drm_i915_private *i915) if (intel_vtd_active()) return true; + /* Older GVT emulation depends upon intercepting CSB mmio */ + if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) + return true; + return false; } @@ -1252,9 +1253,12 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine) } /* WaProgramL3SqcReg1DefaultForPerf:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) - I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | - L3_HIGH_PRIO_CREDITS(2)); + if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { + u32 val = I915_READ(GEN8_L3SQCREG1); + val &= ~L3_PRIO_CREDITS_MASK; + val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); + I915_WRITE(GEN8_L3SQCREG1, val); + } /* WaToEnableHwFixForPushConstHWBug:bxt */ if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) @@ -1544,8 +1548,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) return false; - /* Both ports drained, no more ELSP submission? */ - if (port_request(&engine->execlists.port[0])) + /* Waiting to drain ELSP? */ + if (READ_ONCE(engine->execlists.active)) return false; /* ELSP is empty, but there are ready requests? */ @@ -1622,8 +1626,10 @@ static void print_request(struct drm_printer *m, struct drm_i915_gem_request *rq, const char *prefix) { - drm_printf(m, "%s%x [%x:%x] prio=%d @ %dms: %s\n", prefix, - rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno, + drm_printf(m, "%s%x%s [%x:%x] prio=%d @ %dms: %s\n", prefix, + rq->global_seqno, + i915_gem_request_completed(rq) ? "!" : "", + rq->ctx->hw_id, rq->fence.seqno, rq->priotree.priority, jiffies_to_msecs(jiffies - rq->emitted_jiffies), rq->timeline->common->name); @@ -1631,8 +1637,9 @@ static void print_request(struct drm_printer *m, void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct i915_gpu_error *error = &engine->i915->gpu_error; + struct intel_breadcrumbs * const b = &engine->breadcrumbs; + const struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_gpu_error * const error = &engine->i915->gpu_error; struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_gem_request *rq; struct rb_node *rb; @@ -1696,7 +1703,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) if (i915_modparams.enable_execlists) { const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; - struct intel_engine_execlists * const execlists = &engine->execlists; u32 ptr, read, write; unsigned int idx; @@ -1743,18 +1749,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) idx); } } + drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active); rcu_read_unlock(); - - spin_lock_irq(&engine->timeline->lock); - for (rb = execlists->first; rb; rb = rb_next(rb)) { - struct i915_priolist *p = - rb_entry(rb, typeof(*p), node); - - list_for_each_entry(rq, &p->requests, - priotree.link) - print_request(m, rq, "\t\tQ "); - } - spin_unlock_irq(&engine->timeline->lock); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", I915_READ(RING_PP_DIR_BASE(engine))); @@ -1764,6 +1760,18 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) I915_READ(RING_PP_DIR_DCLV(engine))); } + spin_lock_irq(&engine->timeline->lock); + list_for_each_entry(rq, &engine->timeline->requests, link) + print_request(m, rq, "\t\tE "); + for (rb = execlists->first; rb; rb = rb_next(rb)) { + struct i915_priolist *p = + rb_entry(rb, typeof(*p), node); + + list_for_each_entry(rq, &p->requests, priotree.link) + print_request(m, rq, "\t\tQ "); + } + spin_unlock_irq(&engine->timeline->lock); + spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 8e3a05505f49..1a0f5e0c8d10 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -69,9 +69,9 @@ static inline bool no_fbc_on_multiple_pipes(struct drm_i915_private *dev_priv) * address we program because it starts at the real start of the buffer, so we * have to take this into consideration here. */ -static unsigned int get_crtc_fence_y_offset(struct intel_crtc *crtc) +static unsigned int get_crtc_fence_y_offset(struct intel_fbc *fbc) { - return crtc->base.y - crtc->adjusted_y; + return fbc->state_cache.plane.y - fbc->state_cache.plane.adjusted_y; } /* @@ -727,8 +727,8 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc) intel_fbc_get_plane_source_size(&fbc->state_cache, &effective_w, &effective_h); - effective_w += crtc->adjusted_x; - effective_h += crtc->adjusted_y; + effective_w += fbc->state_cache.plane.adjusted_x; + effective_h += fbc->state_cache.plane.adjusted_y; return effective_w <= max_w && effective_h <= max_h; } @@ -757,6 +757,9 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->plane.src_w = drm_rect_width(&plane_state->base.src) >> 16; cache->plane.src_h = drm_rect_height(&plane_state->base.src) >> 16; cache->plane.visible = plane_state->base.visible; + cache->plane.adjusted_x = plane_state->main.x; + cache->plane.adjusted_y = plane_state->main.y; + cache->plane.y = plane_state->base.src.y1 >> 16; if (!cache->plane.visible) return; @@ -888,7 +891,7 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc, params->crtc.pipe = crtc->pipe; params->crtc.plane = crtc->plane; - params->crtc.fence_y_offset = get_crtc_fence_y_offset(crtc); + params->crtc.fence_y_offset = get_crtc_fence_y_offset(fbc); params->fb.format = cache->fb.format; params->fb.stride = cache->fb.stride; diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index f2bb8116227c..b8af35187d22 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -189,7 +189,7 @@ static int intelfb_create(struct drm_fb_helper *helper, " releasing it\n", intel_fb->base.width, intel_fb->base.height, sizes->fb_width, sizes->fb_height); - drm_framebuffer_unreference(&intel_fb->base); + drm_framebuffer_put(&intel_fb->base); intel_fb = ifbdev->fb = NULL; } if (!intel_fb || WARN_ON(!intel_fb->obj)) { @@ -627,7 +627,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, ifbdev->preferred_bpp = fb->base.format->cpp[0] * 8; ifbdev->fb = fb; - drm_framebuffer_reference(&ifbdev->fb->base); + drm_framebuffer_get(&ifbdev->fb->base); /* Final pass to check if any active pipes don't have fbs */ for_each_crtc(dev, crtc) { diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index 9e18c4fb9909..10037c0fdf95 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -67,6 +67,99 @@ void intel_guc_init_early(struct intel_guc *guc) guc->notify = gen8_guc_raise_irq; } +static u32 get_gt_type(struct drm_i915_private *dev_priv) +{ + /* XXX: GT type based on PCI device ID? field seems unused by fw */ + return 0; +} + +static u32 get_core_family(struct drm_i915_private *dev_priv) +{ + u32 gen = INTEL_GEN(dev_priv); + + switch (gen) { + case 9: + return GUC_CORE_FAMILY_GEN9; + + default: + MISSING_CASE(gen); + return GUC_CORE_FAMILY_UNKNOWN; + } +} + +/* + * Initialise the GuC parameter block before starting the firmware + * transfer. These parameters are read by the firmware on startup + * and cannot be changed thereafter. + */ +void intel_guc_init_params(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 params[GUC_CTL_MAX_DWORDS]; + int i; + + memset(params, 0, sizeof(params)); + + params[GUC_CTL_DEVICE_INFO] |= + (get_gt_type(dev_priv) << GUC_CTL_GT_TYPE_SHIFT) | + (get_core_family(dev_priv) << GUC_CTL_CORE_FAMILY_SHIFT); + + /* + * GuC ARAT increment is 10 ns. GuC default scheduler quantum is one + * second. This ARAR is calculated by: + * Scheduler-Quantum-in-ns / ARAT-increment-in-ns = 1000000000 / 10 + */ + params[GUC_CTL_ARAT_HIGH] = 0; + params[GUC_CTL_ARAT_LOW] = 100000000; + + params[GUC_CTL_WA] |= GUC_CTL_WA_UK_BY_DRIVER; + + params[GUC_CTL_FEATURE] |= GUC_CTL_DISABLE_SCHEDULER | + GUC_CTL_VCS2_ENABLED; + + params[GUC_CTL_LOG_PARAMS] = guc->log.flags; + + if (i915_modparams.guc_log_level >= 0) { + params[GUC_CTL_DEBUG] = + i915_modparams.guc_log_level << GUC_LOG_VERBOSITY_SHIFT; + } else { + params[GUC_CTL_DEBUG] = GUC_LOG_DISABLED; + } + + /* If GuC submission is enabled, set up additional parameters here */ + if (i915_modparams.enable_guc_submission) { + u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; + u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool); + u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16; + + params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; + params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED; + + pgs >>= PAGE_SHIFT; + params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) | + (ctx_in_16 << GUC_CTL_CTXNUM_IN16_SHIFT); + + params[GUC_CTL_FEATURE] |= GUC_CTL_KERNEL_SUBMISSIONS; + + /* Unmask this bit to enable the GuC's internal scheduler */ + params[GUC_CTL_FEATURE] &= ~GUC_CTL_DISABLE_SCHEDULER; + } + + /* + * All SOFT_SCRATCH registers are in FORCEWAKE_BLITTER domain and + * they are power context saved so it's ok to release forcewake + * when we are done here and take it again at xfer time. + */ + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_BLITTER); + + I915_WRITE(SOFT_SCRATCH(0), 0); + + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + I915_WRITE(SOFT_SCRATCH(1 + i), params[i]); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_BLITTER); +} + int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len) { WARN(1, "Unexpected send: action=%#x\n", *action); @@ -263,3 +356,14 @@ err: i915_gem_object_put(obj); return vma; } + +u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv) +{ + u32 wopcm_size = GUC_WOPCM_TOP; + + /* On BXT, the top of WOPCM is reserved for RC6 context */ + if (IS_GEN9_LP(dev_priv)) + wopcm_size -= BXT_GUC_WOPCM_RC6_RESERVED; + + return wopcm_size; +} diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index aa9a7b55be6e..418450b1ae27 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -26,6 +26,7 @@ #define _INTEL_GUC_H_ #include "intel_uncore.h" +#include "intel_guc_fw.h" #include "intel_guc_fwif.h" #include "intel_guc_ct.h" #include "intel_guc_log.h" @@ -33,6 +34,11 @@ #include "i915_guc_reg.h" #include "i915_vma.h" +/* + * Top level structure of GuC. It handles firmware loading and manages client + * pool and doorbells. intel_guc owns a i915_guc_client to replace the legacy + * ExecList submission. + */ struct intel_guc { struct intel_uc_fw fw; struct intel_guc_log log; @@ -83,6 +89,12 @@ static inline void intel_guc_notify(struct intel_guc *guc) guc->notify(guc); } +/* + * GuC does not allow any gfx GGTT address that falls into range [0, WOPCM_TOP), + * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top address is + * 512K. In order to exclude 0-512K address space from GGTT, all gfx objects + * used by GuC is pinned with PIN_OFFSET_BIAS along with size of WOPCM. + */ static inline u32 guc_ggtt_offset(struct i915_vma *vma) { u32 offset = i915_ggtt_offset(vma); @@ -95,6 +107,7 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma) void intel_guc_init_early(struct intel_guc *guc); void intel_guc_init_send_regs(struct intel_guc *guc); +void intel_guc_init_params(struct intel_guc *guc); int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); int intel_guc_sample_forcewake(struct intel_guc *guc); @@ -102,9 +115,6 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); int intel_guc_suspend(struct drm_i915_private *dev_priv); int intel_guc_resume(struct drm_i915_private *dev_priv); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); - -int intel_guc_select_fw(struct intel_guc *guc); -int intel_guc_init_hw(struct intel_guc *guc); u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); #endif diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_fw.c index c7a800a3798d..ef67a36354c5 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -26,31 +26,9 @@ * Dave Gordon <david.s.gordon@intel.com> * Alex Dai <yu.dai@intel.com> */ -#include "i915_drv.h" -#include "intel_uc.h" -/** - * DOC: GuC-specific firmware loader - * - * intel_guc: - * Top level structure of guc. It handles firmware loading and manages client - * pool and doorbells. intel_guc owns a i915_guc_client to replace the legacy - * ExecList submission. - * - * Firmware versioning: - * The firmware build process will generate a version header file with major and - * minor version defined. The versions are built into CSS header of firmware. - * i915 kernel driver set the minimal firmware version required per platform. - * The firmware installation package will install (symbolic link) proper version - * of firmware. - * - * GuC address space: - * GuC does not allow any gfx GGTT address that falls into range [0, WOPCM_TOP), - * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top address is - * 512K. In order to exclude 0-512K address space from GGTT, all gfx objects - * used by GuC is pinned with PIN_OFFSET_BIAS along with size of WOPCM. - * - */ +#include "intel_guc_fw.h" +#include "i915_drv.h" #define SKL_FW_MAJOR 6 #define SKL_FW_MINOR 1 @@ -78,88 +56,45 @@ MODULE_FIRMWARE(I915_KBL_GUC_UCODE); #define I915_GLK_GUC_UCODE GUC_FW_PATH(glk, GLK_FW_MAJOR, GLK_FW_MINOR) - -static u32 get_gttype(struct drm_i915_private *dev_priv) -{ - /* XXX: GT type based on PCI device ID? field seems unused by fw */ - return 0; -} - -static u32 get_core_family(struct drm_i915_private *dev_priv) -{ - u32 gen = INTEL_GEN(dev_priv); - - switch (gen) { - case 9: - return GUC_CORE_FAMILY_GEN9; - - default: - MISSING_CASE(gen); - return GUC_CORE_FAMILY_UNKNOWN; - } -} - -/* - * Initialise the GuC parameter block before starting the firmware - * transfer. These parameters are read by the firmware on startup - * and cannot be changed thereafter. +/** + * intel_guc_fw_select() - selects GuC firmware for uploading + * + * @guc: intel_guc struct + * + * Return: zero when we know firmware, non-zero in other case */ -static void guc_params_init(struct drm_i915_private *dev_priv) +int intel_guc_fw_select(struct intel_guc *guc) { - struct intel_guc *guc = &dev_priv->guc; - u32 params[GUC_CTL_MAX_DWORDS]; - int i; - - memset(¶ms, 0, sizeof(params)); - - params[GUC_CTL_DEVICE_INFO] |= - (get_gttype(dev_priv) << GUC_CTL_GTTYPE_SHIFT) | - (get_core_family(dev_priv) << GUC_CTL_COREFAMILY_SHIFT); - - /* - * GuC ARAT increment is 10 ns. GuC default scheduler quantum is one - * second. This ARAR is calculated by: - * Scheduler-Quantum-in-ns / ARAT-increment-in-ns = 1000000000 / 10 - */ - params[GUC_CTL_ARAT_HIGH] = 0; - params[GUC_CTL_ARAT_LOW] = 100000000; - - params[GUC_CTL_WA] |= GUC_CTL_WA_UK_BY_DRIVER; - - params[GUC_CTL_FEATURE] |= GUC_CTL_DISABLE_SCHEDULER | - GUC_CTL_VCS2_ENABLED; - - params[GUC_CTL_LOG_PARAMS] = guc->log.flags; - - if (i915_modparams.guc_log_level >= 0) { - params[GUC_CTL_DEBUG] = - i915_modparams.guc_log_level << GUC_LOG_VERBOSITY_SHIFT; - } else - params[GUC_CTL_DEBUG] = GUC_LOG_DISABLED; - - /* If GuC submission is enabled, set up additional parameters here */ - if (i915_modparams.enable_guc_submission) { - u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; - u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool); - u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16; - - params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; - params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED; - - pgs >>= PAGE_SHIFT; - params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) | - (ctx_in_16 << GUC_CTL_CTXNUM_IN16_SHIFT); + struct drm_i915_private *dev_priv = guc_to_i915(guc); - params[GUC_CTL_FEATURE] |= GUC_CTL_KERNEL_SUBMISSIONS; + intel_uc_fw_init(&guc->fw, INTEL_UC_FW_TYPE_GUC); - /* Unmask this bit to enable the GuC's internal scheduler */ - params[GUC_CTL_FEATURE] &= ~GUC_CTL_DISABLE_SCHEDULER; + if (i915_modparams.guc_firmware_path) { + guc->fw.path = i915_modparams.guc_firmware_path; + guc->fw.major_ver_wanted = 0; + guc->fw.minor_ver_wanted = 0; + } else if (IS_SKYLAKE(dev_priv)) { + guc->fw.path = I915_SKL_GUC_UCODE; + guc->fw.major_ver_wanted = SKL_FW_MAJOR; + guc->fw.minor_ver_wanted = SKL_FW_MINOR; + } else if (IS_BROXTON(dev_priv)) { + guc->fw.path = I915_BXT_GUC_UCODE; + guc->fw.major_ver_wanted = BXT_FW_MAJOR; + guc->fw.minor_ver_wanted = BXT_FW_MINOR; + } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { + guc->fw.path = I915_KBL_GUC_UCODE; + guc->fw.major_ver_wanted = KBL_FW_MAJOR; + guc->fw.minor_ver_wanted = KBL_FW_MINOR; + } else if (IS_GEMINILAKE(dev_priv)) { + guc->fw.path = I915_GLK_GUC_UCODE; + guc->fw.major_ver_wanted = GLK_FW_MAJOR; + guc->fw.minor_ver_wanted = GLK_FW_MINOR; + } else { + DRM_ERROR("No GuC firmware known for platform with GuC!\n"); + return -ENOENT; } - I915_WRITE(SOFT_SCRATCH(0), 0); - - for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) - I915_WRITE(SOFT_SCRATCH(1 + i), params[i]); + return 0; } /* @@ -250,38 +185,16 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv, return ret; } -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv) -{ - u32 wopcm_size = GUC_WOPCM_TOP; - - /* On BXT, the top of WOPCM is reserved for RC6 context */ - if (IS_GEN9_LP(dev_priv)) - wopcm_size -= BXT_GUC_WOPCM_RC6_RESERVED; - - return wopcm_size; -} - /* * Load the GuC firmware blob into the MinuteIA. */ -static int guc_ucode_xfer(struct drm_i915_private *dev_priv) +static int guc_ucode_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) { - struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; - struct i915_vma *vma; + struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw); + struct drm_i915_private *dev_priv = guc_to_i915(guc); int ret; - ret = i915_gem_object_set_to_gtt_domain(guc_fw->obj, false); - if (ret) { - DRM_DEBUG_DRIVER("set-domain failed %d\n", ret); - return ret; - } - - vma = i915_gem_object_ggtt_pin(guc_fw->obj, NULL, 0, 0, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP); - if (IS_ERR(vma)) { - DRM_DEBUG_DRIVER("pin failed %d\n", (int)PTR_ERR(vma)); - return PTR_ERR(vma); - } + GEM_BUG_ON(guc_fw->type != INTEL_UC_FW_TYPE_GUC); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -312,23 +225,15 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) I915_WRITE(GUC_ARAT_C6DIS, 0x1FF); } - guc_params_init(dev_priv); - ret = guc_ucode_xfer_dma(dev_priv, vma); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - /* - * We keep the object pages for reuse during resume. But we can unpin it - * now that DMA has completed, so it doesn't continue to take up space. - */ - i915_vma_unpin(vma); - return ret; } /** - * intel_guc_init_hw() - finish preparing the GuC for activity + * intel_guc_fw_upload() - finish preparing the GuC for activity * @guc: intel_guc structure * * Called during driver loading and also after a GPU reset. @@ -340,78 +245,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) * * Return: non-zero code on error */ -int intel_guc_init_hw(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - const char *fw_path = guc->fw.path; - int ret; - - DRM_DEBUG_DRIVER("GuC fw status: path %s, fetch %s, load %s\n", - fw_path, - intel_uc_fw_status_repr(guc->fw.fetch_status), - intel_uc_fw_status_repr(guc->fw.load_status)); - - if (guc->fw.fetch_status != INTEL_UC_FIRMWARE_SUCCESS) - return -EIO; - - guc->fw.load_status = INTEL_UC_FIRMWARE_PENDING; - - DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n", - intel_uc_fw_status_repr(guc->fw.fetch_status), - intel_uc_fw_status_repr(guc->fw.load_status)); - - ret = guc_ucode_xfer(dev_priv); - - if (ret) - return -EAGAIN; - - guc->fw.load_status = INTEL_UC_FIRMWARE_SUCCESS; - - DRM_INFO("GuC %s (firmware %s [version %u.%u])\n", - i915_modparams.enable_guc_submission ? "submission enabled" : - "loaded", - guc->fw.path, - guc->fw.major_ver_found, guc->fw.minor_ver_found); - - return 0; -} - -/** - * intel_guc_select_fw() - selects GuC firmware for loading - * @guc: intel_guc struct - * - * Return: zero when we know firmware, non-zero in other case - */ -int intel_guc_select_fw(struct intel_guc *guc) +int intel_guc_fw_upload(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - intel_uc_fw_init(&guc->fw, INTEL_UC_FW_TYPE_GUC); - - if (i915_modparams.guc_firmware_path) { - guc->fw.path = i915_modparams.guc_firmware_path; - guc->fw.major_ver_wanted = 0; - guc->fw.minor_ver_wanted = 0; - } else if (IS_SKYLAKE(dev_priv)) { - guc->fw.path = I915_SKL_GUC_UCODE; - guc->fw.major_ver_wanted = SKL_FW_MAJOR; - guc->fw.minor_ver_wanted = SKL_FW_MINOR; - } else if (IS_BROXTON(dev_priv)) { - guc->fw.path = I915_BXT_GUC_UCODE; - guc->fw.major_ver_wanted = BXT_FW_MAJOR; - guc->fw.minor_ver_wanted = BXT_FW_MINOR; - } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { - guc->fw.path = I915_KBL_GUC_UCODE; - guc->fw.major_ver_wanted = KBL_FW_MAJOR; - guc->fw.minor_ver_wanted = KBL_FW_MINOR; - } else if (IS_GEMINILAKE(dev_priv)) { - guc->fw.path = I915_GLK_GUC_UCODE; - guc->fw.major_ver_wanted = GLK_FW_MAJOR; - guc->fw.minor_ver_wanted = GLK_FW_MINOR; - } else { - DRM_ERROR("No GuC firmware known for platform with GuC!\n"); - return -ENOENT; - } - - return 0; + return intel_uc_fw_upload(&guc->fw, guc_ucode_xfer); } diff --git a/drivers/gpu/drm/i915/intel_guc_fw.h b/drivers/gpu/drm/i915/intel_guc_fw.h new file mode 100644 index 000000000000..023f5baa9dd6 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_fw.h @@ -0,0 +1,33 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_GUC_FW_H_ +#define _INTEL_GUC_FW_H_ + +struct intel_guc; + +int intel_guc_fw_select(struct intel_guc *guc); +int intel_guc_fw_upload(struct intel_guc *guc); + +#endif diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 1c0a2a3de121..80c507435458 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -82,8 +82,8 @@ #define GUC_CTL_ARAT_LOW 2 #define GUC_CTL_DEVICE_INFO 3 -#define GUC_CTL_GTTYPE_SHIFT 0 -#define GUC_CTL_COREFAMILY_SHIFT 7 +#define GUC_CTL_GT_TYPE_SHIFT 0 +#define GUC_CTL_CORE_FAMILY_SHIFT 7 #define GUC_CTL_LOG_PARAMS 4 #define GUC_LOG_VALID (1 << 0) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index e6f8f30ce7bd..5132dc814788 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -70,7 +70,7 @@ static struct intel_hdmi *intel_attached_hdmi(struct drm_connector *connector) return enc_to_intel_hdmi(&intel_attached_encoder(connector)->base); } -static u32 g4x_infoframe_index(enum hdmi_infoframe_type type) +static u32 g4x_infoframe_index(unsigned int type) { switch (type) { case HDMI_INFOFRAME_TYPE_AVI: @@ -85,7 +85,7 @@ static u32 g4x_infoframe_index(enum hdmi_infoframe_type type) } } -static u32 g4x_infoframe_enable(enum hdmi_infoframe_type type) +static u32 g4x_infoframe_enable(unsigned int type) { switch (type) { case HDMI_INFOFRAME_TYPE_AVI: @@ -100,9 +100,11 @@ static u32 g4x_infoframe_enable(enum hdmi_infoframe_type type) } } -static u32 hsw_infoframe_enable(enum hdmi_infoframe_type type) +static u32 hsw_infoframe_enable(unsigned int type) { switch (type) { + case DP_SDP_VSC: + return VIDEO_DIP_ENABLE_VSC_HSW; case HDMI_INFOFRAME_TYPE_AVI: return VIDEO_DIP_ENABLE_AVI_HSW; case HDMI_INFOFRAME_TYPE_SPD: @@ -118,10 +120,12 @@ static u32 hsw_infoframe_enable(enum hdmi_infoframe_type type) static i915_reg_t hsw_dip_data_reg(struct drm_i915_private *dev_priv, enum transcoder cpu_transcoder, - enum hdmi_infoframe_type type, + unsigned int type, int i) { switch (type) { + case DP_SDP_VSC: + return HSW_TVIDEO_DIP_VSC_DATA(cpu_transcoder, i); case HDMI_INFOFRAME_TYPE_AVI: return HSW_TVIDEO_DIP_AVI_DATA(cpu_transcoder, i); case HDMI_INFOFRAME_TYPE_SPD: @@ -136,7 +140,7 @@ hsw_dip_data_reg(struct drm_i915_private *dev_priv, static void g4x_write_infoframe(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len) { const uint32_t *data = frame; @@ -191,7 +195,7 @@ static bool g4x_infoframe_enabled(struct drm_encoder *encoder, static void ibx_write_infoframe(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len) { const uint32_t *data = frame; @@ -251,7 +255,7 @@ static bool ibx_infoframe_enabled(struct drm_encoder *encoder, static void cpt_write_infoframe(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len) { const uint32_t *data = frame; @@ -309,7 +313,7 @@ static bool cpt_infoframe_enabled(struct drm_encoder *encoder, static void vlv_write_infoframe(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len) { const uint32_t *data = frame; @@ -368,7 +372,7 @@ static bool vlv_infoframe_enabled(struct drm_encoder *encoder, static void hsw_write_infoframe(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len) { const uint32_t *data = frame; @@ -377,6 +381,8 @@ static void hsw_write_infoframe(struct drm_encoder *encoder, enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; i915_reg_t ctl_reg = HSW_TVIDEO_DIP_CTL(cpu_transcoder); i915_reg_t data_reg; + int data_size = type == DP_SDP_VSC ? + VIDEO_DIP_VSC_DATA_SIZE : VIDEO_DIP_DATA_SIZE; int i; u32 val = I915_READ(ctl_reg); @@ -392,7 +398,7 @@ static void hsw_write_infoframe(struct drm_encoder *encoder, data++; } /* Write every possible data byte to force correct ECC calculation. */ - for (; i < VIDEO_DIP_DATA_SIZE; i += 4) + for (; i < data_size; i += 4) I915_WRITE(hsw_dip_data_reg(dev_priv, cpu_transcoder, type, i >> 2), 0); mmiowb(); diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 4b4cf56d29ad..c8a48cbc2b7d 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -78,6 +78,42 @@ MODULE_FIRMWARE(I915_KBL_HUC_UCODE); GLK_HUC_FW_MINOR, GLK_BLD_NUM) /** + * intel_huc_select_fw() - selects HuC firmware for loading + * @huc: intel_huc struct + */ +void intel_huc_select_fw(struct intel_huc *huc) +{ + struct drm_i915_private *dev_priv = huc_to_i915(huc); + + intel_uc_fw_init(&huc->fw, INTEL_UC_FW_TYPE_HUC); + + if (i915_modparams.huc_firmware_path) { + huc->fw.path = i915_modparams.huc_firmware_path; + huc->fw.major_ver_wanted = 0; + huc->fw.minor_ver_wanted = 0; + } else if (IS_SKYLAKE(dev_priv)) { + huc->fw.path = I915_SKL_HUC_UCODE; + huc->fw.major_ver_wanted = SKL_HUC_FW_MAJOR; + huc->fw.minor_ver_wanted = SKL_HUC_FW_MINOR; + } else if (IS_BROXTON(dev_priv)) { + huc->fw.path = I915_BXT_HUC_UCODE; + huc->fw.major_ver_wanted = BXT_HUC_FW_MAJOR; + huc->fw.minor_ver_wanted = BXT_HUC_FW_MINOR; + } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { + huc->fw.path = I915_KBL_HUC_UCODE; + huc->fw.major_ver_wanted = KBL_HUC_FW_MAJOR; + huc->fw.minor_ver_wanted = KBL_HUC_FW_MINOR; + } else if (IS_GEMINILAKE(dev_priv)) { + huc->fw.path = I915_GLK_HUC_UCODE; + huc->fw.major_ver_wanted = GLK_HUC_FW_MAJOR; + huc->fw.minor_ver_wanted = GLK_HUC_FW_MINOR; + } else { + DRM_ERROR("No HuC firmware known for platform with HuC!\n"); + return; + } +} + +/** * huc_ucode_xfer() - DMA's the firmware * @dev_priv: the drm_i915_private device * @@ -85,26 +121,15 @@ MODULE_FIRMWARE(I915_KBL_HUC_UCODE); * * Return: 0 on success, non-zero on failure */ -static int huc_ucode_xfer(struct drm_i915_private *dev_priv) +static int huc_ucode_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) { - struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; - struct i915_vma *vma; + struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); + struct drm_i915_private *dev_priv = huc_to_i915(huc); unsigned long offset = 0; u32 size; int ret; - ret = i915_gem_object_set_to_gtt_domain(huc_fw->obj, false); - if (ret) { - DRM_DEBUG_DRIVER("set-domain failed %d\n", ret); - return ret; - } - - vma = i915_gem_object_ggtt_pin(huc_fw->obj, NULL, 0, 0, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP); - if (IS_ERR(vma)) { - DRM_DEBUG_DRIVER("pin failed %d\n", (int)PTR_ERR(vma)); - return PTR_ERR(vma); - } + GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -135,52 +160,10 @@ static int huc_ucode_xfer(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - /* - * We keep the object pages for reuse during resume. But we can unpin it - * now that DMA has completed, so it doesn't continue to take up space. - */ - i915_vma_unpin(vma); - return ret; } /** - * intel_huc_select_fw() - selects HuC firmware for loading - * @huc: intel_huc struct - */ -void intel_huc_select_fw(struct intel_huc *huc) -{ - struct drm_i915_private *dev_priv = huc_to_i915(huc); - - intel_uc_fw_init(&huc->fw, INTEL_UC_FW_TYPE_HUC); - - if (i915_modparams.huc_firmware_path) { - huc->fw.path = i915_modparams.huc_firmware_path; - huc->fw.major_ver_wanted = 0; - huc->fw.minor_ver_wanted = 0; - } else if (IS_SKYLAKE(dev_priv)) { - huc->fw.path = I915_SKL_HUC_UCODE; - huc->fw.major_ver_wanted = SKL_HUC_FW_MAJOR; - huc->fw.minor_ver_wanted = SKL_HUC_FW_MINOR; - } else if (IS_BROXTON(dev_priv)) { - huc->fw.path = I915_BXT_HUC_UCODE; - huc->fw.major_ver_wanted = BXT_HUC_FW_MAJOR; - huc->fw.minor_ver_wanted = BXT_HUC_FW_MINOR; - } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { - huc->fw.path = I915_KBL_HUC_UCODE; - huc->fw.major_ver_wanted = KBL_HUC_FW_MAJOR; - huc->fw.minor_ver_wanted = KBL_HUC_FW_MINOR; - } else if (IS_GEMINILAKE(dev_priv)) { - huc->fw.path = I915_GLK_HUC_UCODE; - huc->fw.major_ver_wanted = GLK_HUC_FW_MAJOR; - huc->fw.minor_ver_wanted = GLK_HUC_FW_MINOR; - } else { - DRM_ERROR("No HuC firmware known for platform with HuC!\n"); - return; - } -} - -/** * intel_huc_init_hw() - load HuC uCode to device * @huc: intel_huc structure * @@ -194,33 +177,7 @@ void intel_huc_select_fw(struct intel_huc *huc) */ void intel_huc_init_hw(struct intel_huc *huc) { - struct drm_i915_private *dev_priv = huc_to_i915(huc); - int err; - - DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n", - huc->fw.path, - intel_uc_fw_status_repr(huc->fw.fetch_status), - intel_uc_fw_status_repr(huc->fw.load_status)); - - if (huc->fw.fetch_status != INTEL_UC_FIRMWARE_SUCCESS) - return; - - huc->fw.load_status = INTEL_UC_FIRMWARE_PENDING; - - err = huc_ucode_xfer(dev_priv); - - huc->fw.load_status = err ? - INTEL_UC_FIRMWARE_FAIL : INTEL_UC_FIRMWARE_SUCCESS; - - DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n", - huc->fw.path, - intel_uc_fw_status_repr(huc->fw.fetch_status), - intel_uc_fw_status_repr(huc->fw.load_status)); - - if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err); - - return; + intel_uc_fw_upload(&huc->fw, huc_ucode_xfer); } /** diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index fbfcf88d7fe3..d36e25607435 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -575,7 +575,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * the state of the GPU is known (idle). */ inject_preempt_context(engine); - execlists->preempt = true; + execlists_set_active(execlists, + EXECLISTS_ACTIVE_PREEMPT); goto unlock; } else { /* @@ -683,8 +684,10 @@ done: unlock: spin_unlock_irq(&engine->timeline->lock); - if (submit) + if (submit) { + execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); execlists_submit_ports(engine); + } } static void @@ -696,6 +699,7 @@ execlist_cancel_port_requests(struct intel_engine_execlists *execlists) while (num_ports-- && port_isset(port)) { struct drm_i915_gem_request *rq = port_request(port); + GEM_BUG_ON(!execlists->active); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED); i915_gem_request_put(rq); @@ -730,7 +734,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { INIT_LIST_HEAD(&rq->priotree.link); - rq->priotree.priority = INT_MAX; dma_fence_set_error(&rq->fence, -EIO); __i915_gem_request_submit(rq); @@ -793,7 +796,6 @@ static void intel_lrc_irq_handler(unsigned long data) &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; unsigned int head, tail; - /* However GVT emulation depends upon intercepting CSB mmio */ if (unlikely(execlists->csb_use_mmio)) { buf = (u32 * __force) (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); @@ -862,15 +864,21 @@ static void intel_lrc_irq_handler(unsigned long data) unwind_incomplete_requests(engine); spin_unlock_irq(&engine->timeline->lock); - GEM_BUG_ON(!execlists->preempt); - execlists->preempt = false; + GEM_BUG_ON(!execlists_is_active(execlists, + EXECLISTS_ACTIVE_PREEMPT)); + execlists_clear_active(execlists, + EXECLISTS_ACTIVE_PREEMPT); continue; } if (status & GEN8_CTX_STATUS_PREEMPTED && - execlists->preempt) + execlists_is_active(execlists, + EXECLISTS_ACTIVE_PREEMPT)) continue; + GEM_BUG_ON(!execlists_is_active(execlists, + EXECLISTS_ACTIVE_USER)); + /* Check the context/desc id for this event matches */ GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); @@ -882,7 +890,6 @@ static void intel_lrc_irq_handler(unsigned long data) execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); trace_i915_gem_request_out(rq); - rq->priotree.priority = INT_MAX; i915_gem_request_put(rq); execlists_port_complete(execlists, port); @@ -893,6 +900,9 @@ static void intel_lrc_irq_handler(unsigned long data) /* After the final element, the hw should be idle */ GEM_BUG_ON(port_count(port) == 0 && !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); + if (port_count(port) == 0) + execlists_clear_active(execlists, + EXECLISTS_ACTIVE_USER); } if (head != execlists->csb_head) { @@ -902,7 +912,7 @@ static void intel_lrc_irq_handler(unsigned long data) } } - if (!execlists->preempt) + if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) execlists_dequeue(engine); intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); @@ -1094,6 +1104,7 @@ execlists_context_pin(struct intel_engine_cs *engine, i915_ggtt_offset(ce->ring->vma); ce->state->obj->mm.dirty = true; + ce->state->obj->pin_global++; i915_gem_context_get(ctx); out: @@ -1121,6 +1132,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine, intel_ring_unpin(ce->ring); + ce->state->obj->pin_global--; i915_gem_object_unpin_map(ce->state->obj); i915_vma_unpin(ce->state); @@ -1459,7 +1471,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); execlists->csb_head = -1; - execlists->preempt = false; + execlists->active = 0; /* After a GPU reset, we may have requests to replay */ if (!i915_modparams.enable_guc_submission && execlists->first) diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index beb9baaf2f2e..dcbc786479f9 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -56,7 +56,7 @@ static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon *lspcon) struct i2c_adapter *adapter = &lspcon_to_intel_dp(lspcon)->aux.ddc; if (drm_lspcon_get_mode(adapter, ¤t_mode)) { - DRM_ERROR("Error reading LSPCON mode\n"); + DRM_DEBUG_KMS("Error reading LSPCON mode\n"); return DRM_LSPCON_MODE_INVALID; } return current_mode; @@ -68,16 +68,15 @@ static enum drm_lspcon_mode lspcon_wait_mode(struct intel_lspcon *lspcon, enum drm_lspcon_mode current_mode; current_mode = lspcon_get_current_mode(lspcon); - if (current_mode == mode || current_mode == DRM_LSPCON_MODE_INVALID) + if (current_mode == mode) goto out; DRM_DEBUG_KMS("Waiting for LSPCON mode %s to settle\n", lspcon_mode_name(mode)); - wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode || - current_mode == DRM_LSPCON_MODE_INVALID, 100); + wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode, 100); if (current_mode != mode) - DRM_DEBUG_KMS("LSPCON mode hasn't settled\n"); + DRM_ERROR("LSPCON mode hasn't settled\n"); out: DRM_DEBUG_KMS("Current LSPCON mode %s\n", @@ -133,6 +132,7 @@ static bool lspcon_wake_native_aux_ch(struct intel_lspcon *lspcon) static bool lspcon_probe(struct intel_lspcon *lspcon) { + int retry; enum drm_dp_dual_mode_type adaptor_type; struct i2c_adapter *adapter = &lspcon_to_intel_dp(lspcon)->aux.ddc; enum drm_lspcon_mode expected_mode; @@ -141,10 +141,18 @@ static bool lspcon_probe(struct intel_lspcon *lspcon) DRM_LSPCON_MODE_PCON : DRM_LSPCON_MODE_LS; /* Lets probe the adaptor and check its type */ - adaptor_type = drm_dp_dual_mode_detect(adapter); + for (retry = 0; retry < 6; retry++) { + if (retry) + usleep_range(500, 1000); + + adaptor_type = drm_dp_dual_mode_detect(adapter); + if (adaptor_type == DRM_DP_DUAL_MODE_LSPCON) + break; + } + if (adaptor_type != DRM_DP_DUAL_MODE_LSPCON) { DRM_DEBUG_KMS("No LSPCON detected, found %s\n", - drm_dp_get_dual_mode_type_name(adaptor_type)); + drm_dp_get_dual_mode_type_name(adaptor_type)); return false; } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2fcff9788b6f..aa12a44e9a76 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3133,7 +3133,11 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev, struct intel_crtc_state *newstate) { struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate; - struct intel_pipe_wm *b = &intel_crtc->wm.active.ilk; + struct intel_atomic_state *intel_state = + to_intel_atomic_state(newstate->base.state); + const struct intel_crtc_state *oldstate = + intel_atomic_get_old_crtc_state(intel_state, intel_crtc); + const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal; int level, max_level = ilk_wm_max_level(to_i915(dev)); /* @@ -3142,6 +3146,9 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev, * and after the vblank. */ *a = newstate->wm.ilk.optimal; + if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base)) + return 0; + a->pipe_enabled |= b->pipe_enabled; a->sprites_enabled |= b->sprites_enabled; a->sprites_scaled |= b->sprites_scaled; @@ -5755,12 +5762,30 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv) mutex_unlock(&dev_priv->wm.wm_mutex); } +/* + * FIXME should probably kill this and improve + * the real watermark readout/sanitation instead + */ +static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv) +{ + I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); + I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); + I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); + + /* + * Don't touch WM1S_LP_EN here. + * Doing so could cause underruns. + */ +} + void ilk_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct ilk_wm_values *hw = &dev_priv->wm.hw; struct drm_crtc *crtc; + ilk_init_lp_watermarks(dev_priv); + for_each_crtc(dev, crtc) ilk_pipe_wm_get_hw_state(crtc); @@ -6591,7 +6616,7 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - uint32_t rc6_mask = 0; + u32 rc6_mode, rc6_mask = 0; /* 1a: Software RC state - RC0 */ I915_WRITE(GEN6_RC_STATE, 0); @@ -6629,8 +6654,15 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ + + /* WaRsUseTimeoutMode:cnl (pre-prod) */ + if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0)) + rc6_mode = GEN7_RC_CTL_TO_MODE; + else + rc6_mode = GEN6_RC_CTL_EI_MODE(1); + I915_WRITE(GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | GEN6_RC_CTL_EI_MODE(1) | rc6_mask); + GEN6_RC_CTL_HW_ENABLE | rc6_mode | rc6_mask); /* * 3b: Enable Coarse Power Gating only when RC6 is enabled. @@ -8200,18 +8232,6 @@ static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv) } } -static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv) -{ - I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); - I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); - I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); - - /* - * Don't touch WM1S_LP_EN here. - * Doing so could cause underruns. - */ -} - static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) { uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; @@ -8245,8 +8265,6 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) (I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS)); - ilk_init_lp_watermarks(dev_priv); - /* * Based on the document from hardware guys the following bits * should be set unconditionally in order to enable FBC. @@ -8359,8 +8377,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_GT_MODE, _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - ilk_init_lp_watermarks(dev_priv); - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); @@ -8477,14 +8493,17 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, int high_prio_credits) { u32 misccpctl; + u32 val; /* WaTempDisableDOPClkGating:bdw */ misccpctl = I915_READ(GEN7_MISCCPCTL); I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); - I915_WRITE(GEN8_L3SQCREG1, - L3_GENERAL_PRIO_CREDITS(general_prio_credits) | - L3_HIGH_PRIO_CREDITS(high_prio_credits)); + val = I915_READ(GEN8_L3SQCREG1); + val &= ~L3_PRIO_CREDITS_MASK; + val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits); + val |= L3_HIGH_PRIO_CREDITS(high_prio_credits); + I915_WRITE(GEN8_L3SQCREG1, val); /* * Wait at least 100 clocks before re-enabling clock gating. @@ -8584,8 +8603,6 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) I915_GTT_PAGE_SIZE_2M); enum pipe pipe; - ilk_init_lp_watermarks(dev_priv); - /* WaSwitchSolVfFArbitrationPriority:bdw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); @@ -8636,8 +8653,6 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) { - ilk_init_lp_watermarks(dev_priv); - /* L3 caching of data atomics doesn't work -- disable it. */ I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); I915_WRITE(HSW_ROW_CHICKEN3, @@ -8681,10 +8696,6 @@ static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) /* WaSwitchSolVfFArbitrationPriority:hsw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); - /* WaRsPkgCStateDisplayPMReq:hsw */ - I915_WRITE(CHICKEN_PAR1_1, - I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); - lpt_init_clock_gating(dev_priv); } @@ -8692,8 +8703,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) { uint32_t snpcr; - ilk_init_lp_watermarks(dev_priv); - I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); /* WaDisableEarlyCull:ivb */ diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 5419cda83ba8..6e3b430fccdc 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -58,6 +58,9 @@ static bool is_edp_psr(struct intel_dp *intel_dp) { + if (!intel_dp_is_edp(intel_dp)) + return false; + return intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED; } @@ -72,37 +75,6 @@ static bool vlv_is_psr_active_on_pipe(struct drm_device *dev, int pipe) (val == VLV_EDP_PSR_ACTIVE_SF_UPDATE); } -static void intel_psr_write_vsc(struct intel_dp *intel_dp, - const struct edp_vsc_psr *vsc_psr) -{ - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc); - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; - i915_reg_t ctl_reg = HSW_TVIDEO_DIP_CTL(cpu_transcoder); - uint32_t *data = (uint32_t *) vsc_psr; - unsigned int i; - - /* As per BSPec (Pipe Video Data Island Packet), we need to disable - the video DIP being updated before program video DIP data buffer - registers for DIP being updated. */ - I915_WRITE(ctl_reg, 0); - POSTING_READ(ctl_reg); - - for (i = 0; i < sizeof(*vsc_psr); i += 4) { - I915_WRITE(HSW_TVIDEO_DIP_VSC_DATA(cpu_transcoder, - i >> 2), *data); - data++; - } - for (; i < VIDEO_DIP_VSC_DATA_SIZE; i += 4) - I915_WRITE(HSW_TVIDEO_DIP_VSC_DATA(cpu_transcoder, - i >> 2), 0); - - I915_WRITE(ctl_reg, VIDEO_DIP_ENABLE_VSC_HSW); - POSTING_READ(ctl_reg); -} - static void vlv_psr_setup_vsc(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { @@ -149,7 +121,8 @@ static void hsw_psr_setup_vsc(struct intel_dp *intel_dp, psr_vsc.sdp_header.HB3 = 0x8; } - intel_psr_write_vsc(intel_dp, &psr_vsc); + intel_dig_port->write_infoframe(&intel_dig_port->base.base, crtc_state, + DP_SDP_VSC, &psr_vsc, sizeof(psr_vsc)); } static void vlv_psr_enable_sink(struct intel_dp *intel_dp) @@ -376,22 +349,25 @@ static void hsw_psr_activate(struct intel_dp *intel_dp) hsw_activate_psr1(intel_dp); } -static bool intel_psr_match_conditions(struct intel_dp *intel_dp) +void intel_psr_compute_config(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *crtc = dig_port->base.base.crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); const struct drm_display_mode *adjusted_mode = - &intel_crtc->config->base.adjusted_mode; + &crtc_state->base.adjusted_mode; int psr_setup_time; - lockdep_assert_held(&dev_priv->psr.lock); - WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); - WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); + if (!HAS_PSR(dev_priv)) + return; + + if (!is_edp_psr(intel_dp)) + return; - dev_priv->psr.source_ok = false; + if (!i915_modparams.enable_psr) { + DRM_DEBUG_KMS("PSR disable by flag\n"); + return; + } /* * HSW spec explicitly says PSR is tied to port A. @@ -402,66 +378,70 @@ static bool intel_psr_match_conditions(struct intel_dp *intel_dp) */ if (HAS_DDI(dev_priv) && dig_port->port != PORT_A) { DRM_DEBUG_KMS("PSR condition failed: Port not supported\n"); - return false; - } - - if (!i915_modparams.enable_psr) { - DRM_DEBUG_KMS("PSR disable by flag\n"); - return false; + return; } if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && !dev_priv->psr.link_standby) { DRM_ERROR("PSR condition failed: Link off requested but not supported on this platform\n"); - return false; + return; } if (IS_HASWELL(dev_priv) && - I915_READ(HSW_STEREO_3D_CTL(intel_crtc->config->cpu_transcoder)) & + I915_READ(HSW_STEREO_3D_CTL(crtc_state->cpu_transcoder)) & S3D_ENABLE) { DRM_DEBUG_KMS("PSR condition failed: Stereo 3D is Enabled\n"); - return false; + return; } if (IS_HASWELL(dev_priv) && adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) { DRM_DEBUG_KMS("PSR condition failed: Interlaced is Enabled\n"); - return false; + return; } psr_setup_time = drm_dp_psr_setup_time(intel_dp->psr_dpcd); if (psr_setup_time < 0) { DRM_DEBUG_KMS("PSR condition failed: Invalid PSR setup time (0x%02x)\n", intel_dp->psr_dpcd[1]); - return false; + return; } if (intel_usecs_to_scanlines(adjusted_mode, psr_setup_time) > adjusted_mode->crtc_vtotal - adjusted_mode->crtc_vdisplay - 1) { DRM_DEBUG_KMS("PSR condition failed: PSR setup time (%d us) too long\n", psr_setup_time); - return false; + return; + } + + /* + * FIXME psr2_support is messed up. It's both computed + * dynamically during PSR enable, and extracted from sink + * caps during eDP detection. + */ + if (!dev_priv->psr.psr2_support) { + crtc_state->has_psr = true; + return; } /* PSR2 is restricted to work with panel resolutions upto 3200x2000 */ - if (dev_priv->psr.psr2_support && - (intel_crtc->config->pipe_src_w > 3200 || - intel_crtc->config->pipe_src_h > 2000)) { - dev_priv->psr.psr2_support = false; - return false; + if (adjusted_mode->crtc_hdisplay > 3200 || + adjusted_mode->crtc_vdisplay > 2000) { + DRM_DEBUG_KMS("PSR2 disabled, panel resolution too big\n"); + return; } /* * FIXME:enable psr2 only for y-cordinate psr2 panels * After gtc implementation , remove this restriction. */ - if (!dev_priv->psr.y_cord_support && dev_priv->psr.psr2_support) { + if (!dev_priv->psr.y_cord_support) { DRM_DEBUG_KMS("PSR2 disabled, panel does not support Y coordinate\n"); - return false; + return; } - dev_priv->psr.source_ok = true; - return true; + crtc_state->has_psr = true; + crtc_state->has_psr2 = true; } static void intel_psr_activate(struct intel_dp *intel_dp) @@ -531,13 +511,8 @@ void intel_psr_enable(struct intel_dp *intel_dp, struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - if (!HAS_PSR(dev_priv)) - return; - - if (!is_edp_psr(intel_dp)) { - DRM_DEBUG_KMS("PSR not supported by this panel\n"); + if (!crtc_state->has_psr) return; - } WARN_ON(dev_priv->drrs.dp); mutex_lock(&dev_priv->psr.lock); @@ -546,8 +521,8 @@ void intel_psr_enable(struct intel_dp *intel_dp, goto unlock; } - if (!intel_psr_match_conditions(intel_dp)) - goto unlock; + dev_priv->psr.psr2_support = crtc_state->has_psr2; + dev_priv->psr.source_ok = true; dev_priv->psr.busy_frontbuffer_bits = 0; @@ -668,7 +643,7 @@ void intel_psr_disable(struct intel_dp *intel_dp, struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - if (!HAS_PSR(dev_priv)) + if (!old_crtc_state->has_psr) return; mutex_lock(&dev_priv->psr.lock); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4285f09ff8b8..8da1bde442dd 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -484,11 +484,6 @@ static bool stop_ring(struct intel_engine_cs *engine) I915_WRITE_HEAD(engine, 0); I915_WRITE_TAIL(engine, 0); - if (INTEL_GEN(dev_priv) > 2) { - (void)I915_READ_CTL(engine); - I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); - } - return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0; } @@ -570,6 +565,9 @@ static int init_ring_common(struct intel_engine_cs *engine) intel_engine_init_hangcheck(engine); + if (INTEL_GEN(dev_priv) > 2) + I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); + out: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); @@ -1246,6 +1244,8 @@ int intel_ring_pin(struct intel_ring *ring, if (IS_ERR(addr)) goto err; + vma->obj->pin_global++; + ring->vaddr = addr; return 0; @@ -1277,6 +1277,7 @@ void intel_ring_unpin(struct intel_ring *ring) i915_gem_object_unpin_map(ring->vma->obj); ring->vaddr = NULL; + ring->vma->obj->pin_global--; i915_vma_unpin(ring->vma); } @@ -1441,6 +1442,7 @@ intel_ring_context_pin(struct intel_engine_cs *engine, goto err; ce->state->obj->mm.dirty = true; + ce->state->obj->pin_global++; } /* The kernel context is only used as a placeholder for flushing the @@ -1475,8 +1477,10 @@ static void intel_ring_context_unpin(struct intel_engine_cs *engine, if (--ce->pin_count) return; - if (ce->state) + if (ce->state) { + ce->state->obj->pin_global--; i915_vma_unpin(ce->state); + } i915_gem_context_put(ctx); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 17186f067408..6a42ed618a28 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -241,9 +241,17 @@ struct intel_engine_execlists { } port[EXECLIST_MAX_PORTS]; /** - * @preempt: are we currently handling a preempting context switch? + * @active: is the HW active? We consider the HW as active after + * submitting any context for execution and until we have seen the + * last context completion event. After that, we do not expect any + * more events until we submit, and so can park the HW. + * + * As we have a small number of different sources from which we feed + * the HW, we track the state of each inside a single bitfield. */ - bool preempt; + unsigned int active; +#define EXECLISTS_ACTIVE_USER 0 +#define EXECLISTS_ACTIVE_PREEMPT 1 /** * @port_mask: number of execlist ports - 1 @@ -525,6 +533,27 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; +static inline void +execlists_set_active(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + __set_bit(bit, (unsigned long *)&execlists->active); +} + +static inline void +execlists_clear_active(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + __clear_bit(bit, (unsigned long *)&execlists->active); +} + +static inline bool +execlists_is_active(const struct intel_engine_execlists *execlists, + unsigned int bit) +{ + return test_bit(bit, (unsigned long *)&execlists->active); +} + static inline unsigned int execlists_num_ports(const struct intel_engine_execlists * const execlists) { @@ -538,6 +567,7 @@ execlists_port_complete(struct intel_engine_execlists * const execlists, const unsigned int m = execlists->port_mask; GEM_BUG_ON(port_index(port, execlists) != 0); + GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); memmove(port, port + 1, m * sizeof(struct execlist_port)); memset(port + m, 0, sizeof(struct execlist_port)); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 86fc9b529f2d..4fcf80ca91dd 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -230,7 +230,7 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state) #endif } -static void +void skl_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) @@ -311,7 +311,7 @@ skl_update_plane(struct intel_plane *plane, spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -static void +void skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 7b938e822fde..25bd162f38d2 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -68,7 +68,7 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) if (HAS_HUC_UCODE(dev_priv)) intel_huc_select_fw(&dev_priv->huc); - if (intel_guc_select_fw(&dev_priv->guc)) + if (intel_guc_fw_select(&dev_priv->guc)) i915_modparams.enable_guc_loading = 0; } @@ -195,7 +195,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) goto err_submission; intel_huc_init_hw(&dev_priv->huc); - ret = intel_guc_init_hw(&dev_priv->guc); + intel_guc_init_params(guc); + ret = intel_guc_fw_upload(guc); if (ret == 0 || ret != -EAGAIN) break; @@ -221,6 +222,12 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) goto err_interrupts; } + dev_info(dev_priv->drm.dev, "GuC %s (firmware %s [version %u.%u])\n", + i915_modparams.enable_guc_submission ? "submission enabled" : + "loaded", + guc->fw.path, + guc->fw.major_ver_found, guc->fw.minor_ver_found); + return 0; /* @@ -243,12 +250,14 @@ err_submission: err_guc: i915_ggtt_disable_guc(dev_priv); - DRM_ERROR("GuC init failed\n"); if (i915_modparams.enable_guc_loading > 1 || - i915_modparams.enable_guc_submission > 1) + i915_modparams.enable_guc_submission > 1) { + DRM_ERROR("GuC init failed. Firmware loading disabled.\n"); ret = -EIO; - else + } else { + DRM_NOTE("GuC init failed. Firmware loading disabled.\n"); ret = 0; + } if (i915_modparams.enable_guc_submission) { i915_modparams.enable_guc_submission = 0; @@ -256,7 +265,6 @@ err_guc: } i915_modparams.enable_guc_loading = 0; - DRM_NOTE("GuC firmware loading disabled\n"); return ret; } diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c index 766b1cbdfbd7..973888e94cba 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -23,6 +23,7 @@ */ #include <linux/firmware.h> +#include <drm/drm_print.h> #include "intel_uc_fw.h" #include "i915_drv.h" @@ -45,26 +46,33 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, size_t size; int err; + DRM_DEBUG_DRIVER("%s fw fetch %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); + if (!uc_fw->path) return; uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; - - DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n", + DRM_DEBUG_DRIVER("%s fw fetch %s\n", + intel_uc_fw_type_repr(uc_fw->type), intel_uc_fw_status_repr(uc_fw->fetch_status)); err = request_firmware(&fw, uc_fw->path, &pdev->dev); - if (err) - goto fail; - if (!fw) + if (err) { + DRM_DEBUG_DRIVER("%s fw request_firmware err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); goto fail; + } - DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n", - uc_fw->path, fw); + DRM_DEBUG_DRIVER("%s fw size %zu ptr %p\n", + intel_uc_fw_type_repr(uc_fw->type), fw->size, fw); /* Check the size of the blob before examining buffer contents */ if (fw->size < sizeof(struct uc_css_header)) { - DRM_NOTE("Firmware header is missing\n"); + DRM_WARN("%s: Unexpected firmware size (%zu, min %zu)\n", + intel_uc_fw_type_repr(uc_fw->type), + fw->size, sizeof(struct uc_css_header)); + err = -ENODATA; goto fail; } @@ -77,7 +85,9 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, sizeof(u32); if (uc_fw->header_size != sizeof(struct uc_css_header)) { - DRM_NOTE("CSS header definition mismatch\n"); + DRM_WARN("%s: Mismatched firmware header definition\n", + intel_uc_fw_type_repr(uc_fw->type)); + err = -ENOEXEC; goto fail; } @@ -85,9 +95,20 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); + /* Header and uCode will be loaded to WOPCM */ + size = uc_fw->header_size + uc_fw->ucode_size; + if (size > intel_guc_wopcm_size(dev_priv)) { + DRM_WARN("%s: Firmware is too large to fit in WOPCM\n", + intel_uc_fw_type_repr(uc_fw->type)); + err = -E2BIG; + goto fail; + } + /* now RSA */ if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { - DRM_NOTE("RSA key size is bad\n"); + DRM_WARN("%s: Mismatched firmware RSA key size (%u)\n", + intel_uc_fw_type_repr(uc_fw->type), css->key_size_dw); + err = -ENOEXEC; goto fail; } uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; @@ -96,7 +117,9 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, /* At least, it should have header, uCode and RSA. Size of all three. */ size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; if (fw->size < size) { - DRM_NOTE("Missing firmware components\n"); + DRM_WARN("%s: Truncated firmware (%zu, expected %zu)\n", + intel_uc_fw_type_repr(uc_fw->type), fw->size, size); + err = -ENOEXEC; goto fail; } @@ -108,14 +131,6 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, */ switch (uc_fw->type) { case INTEL_UC_FW_TYPE_GUC: - /* Header and uCode will be loaded to WOPCM. Size of the two. */ - size = uc_fw->header_size + uc_fw->ucode_size; - - /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */ - if (size > intel_guc_wopcm_size(dev_priv)) { - DRM_ERROR("Firmware is too large to fit in WOPCM\n"); - goto fail; - } uc_fw->major_ver_found = css->guc.sw_version >> 16; uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF; break; @@ -126,17 +141,21 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, break; default: - DRM_ERROR("Unknown firmware type %d\n", uc_fw->type); - err = -ENOEXEC; - goto fail; + MISSING_CASE(uc_fw->type); + break; } + DRM_DEBUG_DRIVER("%s fw version %u.%u (wanted %u.%u)\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { - DRM_NOTE("Skipping %s firmware version check\n", + DRM_NOTE("%s: Skipping firmware version check\n", intel_uc_fw_type_repr(uc_fw->type)); } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { - DRM_NOTE("%s firmware version %d.%d, required %d.%d\n", + DRM_NOTE("%s: Wrong firmware version (%u.%u, required %u.%u)\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->major_ver_found, uc_fw->minor_ver_found, uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); @@ -144,34 +163,115 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, goto fail; } - DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n", - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); if (IS_ERR(obj)) { err = PTR_ERR(obj); + DRM_DEBUG_DRIVER("%s fw object_create err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); goto fail; } uc_fw->obj = obj; uc_fw->size = fw->size; - - DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n", - uc_fw->obj); + uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; + DRM_DEBUG_DRIVER("%s fw fetch %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->fetch_status)); release_firmware(fw); - uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; return; fail: - DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n", - uc_fw->path, err); - DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", - err, fw, uc_fw->obj); + uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; + DRM_DEBUG_DRIVER("%s fw fetch %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->fetch_status)); + + DRM_WARN("%s: Failed to fetch firmware %s (error %d)\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + DRM_INFO("%s: Firmware can be downloaded from %s\n", + intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL); release_firmware(fw); /* OK even if fw is NULL */ - uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; +} + +/** + * intel_uc_fw_upload - load uC firmware using custom loader + * + * @uc_fw: uC firmware + * @loader: custom uC firmware loader function + * + * Loads uC firmware using custom loader and updates internal flags. + */ +int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, + int (*xfer)(struct intel_uc_fw *uc_fw, + struct i915_vma *vma)) +{ + struct i915_vma *vma; + int err; + + DRM_DEBUG_DRIVER("%s fw load %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); + + if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) + return -EIO; + + uc_fw->load_status = INTEL_UC_FIRMWARE_PENDING; + DRM_DEBUG_DRIVER("%s fw load %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->load_status)); + + /* Pin object with firmware */ + err = i915_gem_object_set_to_gtt_domain(uc_fw->obj, false); + if (err) { + DRM_DEBUG_DRIVER("%s fw set-domain err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + goto fail; + } + + vma = i915_gem_object_ggtt_pin(uc_fw->obj, NULL, 0, 0, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + DRM_DEBUG_DRIVER("%s fw ggtt-pin err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + goto fail; + } + + /* Call custom loader */ + err = xfer(uc_fw, vma); + + /* + * We keep the object pages for reuse during resume. But we can unpin it + * now that DMA has completed, so it doesn't continue to take up space. + */ + i915_vma_unpin(vma); + + if (err) + goto fail; + + uc_fw->load_status = INTEL_UC_FIRMWARE_SUCCESS; + DRM_DEBUG_DRIVER("%s fw load %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->load_status)); + + DRM_INFO("%s: Loaded firmware %s (version %u.%u)\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->path, + uc_fw->major_ver_found, uc_fw->minor_ver_found); + + return 0; + +fail: + uc_fw->load_status = INTEL_UC_FIRMWARE_FAIL; + DRM_DEBUG_DRIVER("%s fw load %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->load_status)); + + DRM_WARN("%s: Failed to load firmware %s (error %d)\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + + return err; } /** @@ -191,3 +291,28 @@ void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; } + +/** + * intel_uc_fw_dump - dump information about uC firmware + * @uc_fw: uC firmware + * @p: the &drm_printer + * + * Pretty printer for uC firmware. + */ +void intel_uc_fw_dump(struct intel_uc_fw *uc_fw, struct drm_printer *p) +{ + drm_printf(p, "%s firmware: %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); + drm_printf(p, "\tstatus: fetch %s, load %s\n", + intel_uc_fw_status_repr(uc_fw->fetch_status), + intel_uc_fw_status_repr(uc_fw->load_status)); + drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted, + uc_fw->major_ver_found, uc_fw->minor_ver_found); + drm_printf(p, "\theader: offset %u, size %u\n", + uc_fw->header_offset, uc_fw->header_size); + drm_printf(p, "\tuCode: offset %u, size %u\n", + uc_fw->ucode_offset, uc_fw->ucode_size); + drm_printf(p, "\tRSA: offset %u, size %u\n", + uc_fw->rsa_offset, uc_fw->rsa_size); +} diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h index c3e9af4b9bf0..132903669391 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -25,7 +25,12 @@ #ifndef _INTEL_UC_FW_H_ #define _INTEL_UC_FW_H_ +struct drm_printer; struct drm_i915_private; +struct i915_vma; + +/* Home of GuC, HuC and DMC firmwares */ +#define INTEL_UC_FIRMWARE_URL "https://01.org/linuxgraphics/downloads/firmware" enum intel_uc_fw_status { INTEL_UC_FIRMWARE_FAIL = -1, @@ -50,6 +55,11 @@ struct intel_uc_fw { enum intel_uc_fw_status fetch_status; enum intel_uc_fw_status load_status; + /* + * The firmware build process will generate a version header file with major and + * minor version defined. The versions are built into CSS header of firmware. + * i915 kernel driver set the minimal firmware version required per platform. + */ u16 major_ver_wanted; u16 minor_ver_wanted; u16 major_ver_found; @@ -102,6 +112,10 @@ void intel_uc_fw_init(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type) void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, struct intel_uc_fw *uc_fw); +int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, + int (*xfer)(struct intel_uc_fw *uc_fw, + struct i915_vma *vma)); void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); +void intel_uc_fw_dump(struct intel_uc_fw *uc_fw, struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 983617b5b338..20e3c65c0999 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1403,6 +1403,9 @@ static void i915_stop_engines(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine; enum intel_engine_id id; + if (INTEL_GEN(dev_priv) < 3) + return; + for_each_engine_masked(engine, dev_priv, engine_mask, id) gen3_stop_engine(engine); } @@ -1742,16 +1745,12 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv) int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) { - reset_func reset; + reset_func reset = intel_get_gpu_reset(dev_priv); int retry; int ret; might_sleep(); - reset = intel_get_gpu_reset(dev_priv); - if (reset == NULL) - return -ENODEV; - /* If the power well sleeps during the reset, the reset * request may be dropped and never completes (causing -EIO). */ @@ -1771,7 +1770,9 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) */ i915_stop_engines(dev_priv, engine_mask); - ret = reset(dev_priv, engine_mask); + ret = -ENODEV; + if (reset) + ret = reset(dev_priv, engine_mask); if (ret != -ETIMEDOUT) break; diff --git a/drivers/gpu/drm/i915/intel_vbt_defs.h b/drivers/gpu/drm/i915/intel_vbt_defs.h index 404569c9fdfc..f225c288a121 100644 --- a/drivers/gpu/drm/i915/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/intel_vbt_defs.h @@ -306,6 +306,14 @@ struct bdb_general_features { #define LEGACY_CHILD_DEVICE_CONFIG_SIZE 33 +/* DDC Bus DDI Type 155+ */ +enum vbt_gmbus_ddi { + DDC_BUS_DDI_B = 0x1, + DDC_BUS_DDI_C, + DDC_BUS_DDI_D, + DDC_BUS_DDI_F, +}; + /* * The child device config, aka the display device data structure, provides a * description of a port and its configuration on the platform. diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index c53f8474113a..5cc8101bb2b1 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -609,7 +609,7 @@ static int igt_mock_ppgtt_huge_fill(void *arg) bool single = false; LIST_HEAD(objects); IGT_TIMEOUT(end_time); - int err; + int err = -ENODEV; for_each_prime_number_from(page_num, 1, max_pages) { struct drm_i915_gem_object *obj; @@ -1157,7 +1157,7 @@ static int igt_ppgtt_exhaust_huge(void *arg) unsigned int size_mask; unsigned int page_mask; int n, i; - int err; + int err = -ENODEV; /* * Sanity check creating objects with a varying mix of page sizes -- diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index fb0a58fc8348..def5052862ae 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -417,7 +417,7 @@ static int fake_aliasing_ppgtt_enable(struct drm_i915_private *i915) if (err) return err; - list_for_each_entry(obj, &i915->mm.bound_list, global_link) { + list_for_each_entry(obj, &i915->mm.bound_list, mm.link) { struct i915_vma *vma; vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 5ea373221f49..f463105ff48d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -24,6 +24,9 @@ #include "../i915_selftest.h" +#include "lib_sw_fence.h" +#include "mock_context.h" +#include "mock_drm.h" #include "mock_gem_device.h" static int populate_ggtt(struct drm_i915_private *i915) @@ -47,7 +50,7 @@ static int populate_ggtt(struct drm_i915_private *i915) if (!list_empty(&i915->mm.unbound_list)) { size = 0; - list_for_each_entry(obj, &i915->mm.unbound_list, global_link) + list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) size++; pr_err("Found %lld objects unbound!\n", size); @@ -74,10 +77,10 @@ static void cleanup_objects(struct drm_i915_private *i915) { struct drm_i915_gem_object *obj, *on; - list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, global_link) + list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, mm.link) i915_gem_object_put(obj); - list_for_each_entry_safe(obj, on, &i915->mm.bound_list, global_link) + list_for_each_entry_safe(obj, on, &i915->mm.bound_list, mm.link) i915_gem_object_put(obj); mutex_unlock(&i915->drm.struct_mutex); @@ -149,8 +152,6 @@ static int igt_overcommit(void *arg) goto cleanup; } - list_move(&obj->global_link, &i915->mm.unbound_list); - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); if (!IS_ERR(vma) || PTR_ERR(vma) != -ENOSPC) { pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR(vma)); @@ -325,6 +326,148 @@ cleanup: return err; } +static int igt_evict_contexts(void *arg) +{ + const u64 PRETEND_GGTT_SIZE = 16ull << 20; + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct reserved { + struct drm_mm_node node; + struct reserved *next; + } *reserved = NULL; + struct drm_mm_node hole; + unsigned long count; + int err; + + /* + * The purpose of this test is to verify that we will trigger an + * eviction in the GGTT when constructing a request that requires + * additional space in the GGTT for pinning the context. This space + * is not directly tied to the request so reclaiming it requires + * extra work. + * + * As such this test is only meaningful for full-ppgtt environments + * where the GTT space of the request is separate from the GGTT + * allocation required to build the request. + */ + if (!USES_FULL_PPGTT(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + + /* Reserve a block so that we know we have enough to fit a few rq */ + memset(&hole, 0, sizeof(hole)); + err = i915_gem_gtt_insert(&i915->ggtt.base, &hole, + PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE, + 0, i915->ggtt.base.total, + PIN_NOEVICT); + if (err) + goto out_locked; + + /* Make the GGTT appear small by filling it with unevictable nodes */ + count = 0; + do { + struct reserved *r; + + r = kcalloc(1, sizeof(*r), GFP_KERNEL); + if (!r) { + err = -ENOMEM; + goto out_locked; + } + + if (i915_gem_gtt_insert(&i915->ggtt.base, &r->node, + 1ul << 20, 0, I915_COLOR_UNEVICTABLE, + 0, i915->ggtt.base.total, + PIN_NOEVICT)) { + kfree(r); + break; + } + + r->next = reserved; + reserved = r; + + count++; + } while (1); + drm_mm_remove_node(&hole); + mutex_unlock(&i915->drm.struct_mutex); + pr_info("Filled GGTT with %lu 1MiB nodes\n", count); + + /* Overfill the GGTT with context objects and so try to evict one. */ + for_each_engine(engine, i915, id) { + struct i915_sw_fence fence; + struct drm_file *file; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + count = 0; + mutex_lock(&i915->drm.struct_mutex); + onstack_fence_init(&fence); + do { + struct drm_i915_gem_request *rq; + struct i915_gem_context *ctx; + + ctx = live_context(i915, file); + if (!ctx) + break; + + /* We will need some GGTT space for the rq's context */ + igt_evict_ctl.fail_if_busy = true; + rq = i915_gem_request_alloc(engine, ctx); + igt_evict_ctl.fail_if_busy = false; + + if (IS_ERR(rq)) { + /* When full, fail_if_busy will trigger EBUSY */ + if (PTR_ERR(rq) != -EBUSY) { + pr_err("Unexpected error from request alloc (ctx hw id %u, on %s): %d\n", + ctx->hw_id, engine->name, + (int)PTR_ERR(rq)); + err = PTR_ERR(rq); + } + break; + } + + /* Keep every request/ctx pinned until we are full */ + err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, + &fence, + GFP_KERNEL); + if (err < 0) + break; + + i915_add_request(rq); + count++; + err = 0; + } while(1); + mutex_unlock(&i915->drm.struct_mutex); + + onstack_fence_fini(&fence); + pr_info("Submitted %lu contexts/requests on %s\n", + count, engine->name); + + mock_file_free(i915, file); + if (err) + break; + } + + mutex_lock(&i915->drm.struct_mutex); +out_locked: + while (reserved) { + struct reserved *next = reserved->next; + + drm_mm_remove_node(&reserved->node); + kfree(reserved); + + reserved = next; + } + if (drm_mm_node_allocated(&hole)) + drm_mm_remove_node(&hole); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + int i915_gem_evict_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -348,3 +491,12 @@ int i915_gem_evict_mock_selftests(void) drm_dev_unref(&i915->drm); return err; } + +int i915_gem_evict_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_evict_contexts), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 64acd7eccc5c..54a73534b37e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -15,6 +15,7 @@ selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) selftest(gtt, i915_gem_gtt_live_selftests) +selftest(evict, i915_gem_evict_live_selftests) selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(contexts, i915_gem_context_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c index 19d145d6bf52..ea01d0fe3ace 100644 --- a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c @@ -24,6 +24,7 @@ #include <linux/completion.h> #include <linux/delay.h> +#include <linux/prime_numbers.h> #include "../i915_selftest.h" @@ -565,6 +566,46 @@ err_in: return ret; } +static int test_timer(void *arg) +{ + unsigned long target, delay; + struct timed_fence tf; + + timed_fence_init(&tf, target = jiffies); + if (!i915_sw_fence_done(&tf.fence)) { + pr_err("Fence with immediate expiration not signaled\n"); + goto err; + } + timed_fence_fini(&tf); + + for_each_prime_number(delay, i915_selftest.timeout_jiffies/2) { + timed_fence_init(&tf, target = jiffies + delay); + if (i915_sw_fence_done(&tf.fence)) { + pr_err("Fence with future expiration (%lu jiffies) already signaled\n", delay); + goto err; + } + + i915_sw_fence_wait(&tf.fence); + if (!i915_sw_fence_done(&tf.fence)) { + pr_err("Fence not signaled after wait\n"); + goto err; + } + if (time_before(jiffies, target)) { + pr_err("Fence signaled too early, target=%lu, now=%lu\n", + target, jiffies); + goto err; + } + + timed_fence_fini(&tf); + } + + return 0; + +err: + timed_fence_fini(&tf); + return -EINVAL; +} + int i915_sw_fence_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -576,6 +617,7 @@ int i915_sw_fence_mock_selftests(void) SUBTEST(test_C_AB), SUBTEST(test_chain), SUBTEST(test_ipc), + SUBTEST(test_timer), }; return i915_subtests(tests, NULL); diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c new file mode 100644 index 000000000000..3790fdf44a1a --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c @@ -0,0 +1,78 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "lib_sw_fence.h" + +/* Small library of different fence types useful for writing tests */ + +static int __i915_sw_fence_call +nop_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ + return NOTIFY_DONE; +} + +void __onstack_fence_init(struct i915_sw_fence *fence, + const char *name, + struct lock_class_key *key) +{ + debug_fence_init_onstack(fence); + + __init_waitqueue_head(&fence->wait, name, key); + atomic_set(&fence->pending, 1); + fence->flags = (unsigned long)nop_fence_notify; +} + +void onstack_fence_fini(struct i915_sw_fence *fence) +{ + i915_sw_fence_commit(fence); + i915_sw_fence_fini(fence); +} + +static void timed_fence_wake(unsigned long data) +{ + struct timed_fence *tf = (struct timed_fence *)data; + + i915_sw_fence_commit(&tf->fence); +} + +void timed_fence_init(struct timed_fence *tf, unsigned long expires) +{ + onstack_fence_init(&tf->fence); + + setup_timer_on_stack(&tf->timer, timed_fence_wake, (unsigned long)tf); + + if (time_after(expires, jiffies)) + mod_timer(&tf->timer, expires); + else + i915_sw_fence_commit(&tf->fence); +} + +void timed_fence_fini(struct timed_fence *tf) +{ + if (del_timer_sync(&tf->timer)) + i915_sw_fence_commit(&tf->fence); + + destroy_timer_on_stack(&tf->timer); + i915_sw_fence_fini(&tf->fence); +} diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h new file mode 100644 index 000000000000..474aafb92ae1 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h @@ -0,0 +1,42 @@ +/* + * lib_sw_fence.h - library routines for testing N:M synchronisation points + * + * Copyright (C) 2017 Intel Corporation + * + * This file is released under the GPLv2. + * + */ + +#ifndef _LIB_SW_FENCE_H_ +#define _LIB_SW_FENCE_H_ + +#include <linux/timer.h> + +#include "../i915_sw_fence.h" + +#ifdef CONFIG_LOCKDEP +#define onstack_fence_init(fence) \ +do { \ + static struct lock_class_key __key; \ + \ + __onstack_fence_init((fence), #fence, &__key); \ +} while (0) +#else +#define onstack_fence_init(fence) \ + __onstack_fence_init((fence), NULL, NULL) +#endif + +void __onstack_fence_init(struct i915_sw_fence *fence, + const char *name, + struct lock_class_key *key); +void onstack_fence_fini(struct i915_sw_fence *fence); + +struct timed_fence { + struct i915_sw_fence fence; + struct timer_list timer; +}; + +void timed_fence_init(struct timed_fence *tf, unsigned long expires); +void timed_fence_fini(struct timed_fence *tf); + +#endif /* _LIB_SW_FENCE_H_ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index 098ce643ad07..bbf80d42e793 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -73,11 +73,7 @@ err_put: void mock_context_close(struct i915_gem_context *ctx) { - i915_gem_context_set_closed(ctx); - - i915_ppgtt_close(&ctx->ppgtt->base); - - i915_gem_context_put(ctx); + context_close(ctx); } void mock_init_contexts(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index fc0fd7498689..331c2b09869e 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -32,9 +32,9 @@ static struct mock_request *first_request(struct mock_engine *engine) link); } -static void hw_delay_complete(unsigned long data) +static void hw_delay_complete(struct timer_list *t) { - struct mock_engine *engine = (typeof(engine))data; + struct mock_engine *engine = from_timer(engine, t, hw_delay); struct mock_request *request; spin_lock(&engine->hw_lock); @@ -161,9 +161,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, /* fake hw queue */ spin_lock_init(&engine->hw_lock); - setup_timer(&engine->hw_delay, - hw_delay_complete, - (unsigned long)engine); + timer_setup(&engine->hw_delay, hw_delay_complete, 0); INIT_LIST_HEAD(&engine->hw_queue); return &engine->base; diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 33008fa1be9b..d0b26dd80076 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -8,6 +8,7 @@ msm-y := \ adreno/a4xx_gpu.o \ adreno/a5xx_gpu.o \ adreno/a5xx_power.o \ + adreno/a5xx_preempt.o \ hdmi/hdmi.o \ hdmi/hdmi_audio.o \ hdmi/hdmi_bridge.o \ @@ -57,7 +58,8 @@ msm-y := \ msm_iommu.o \ msm_perf.o \ msm_rd.o \ - msm_ringbuffer.o + msm_ringbuffer.o \ + msm_submitqueue.o msm-$(CONFIG_DRM_FBDEV_EMULATION) += msm_fbdev.o msm-$(CONFIG_COMMON_CLK) += mdp/mdp4/mdp4_lvds_pll.o diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 7791313405b5..4baef2738178 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -44,7 +44,7 @@ static bool a3xx_idle(struct msm_gpu *gpu); static bool a3xx_me_init(struct msm_gpu *gpu) { - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT3(ring, CP_ME_INIT, 17); OUT_RING(ring, 0x000003f7); @@ -65,7 +65,7 @@ static bool a3xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); return a3xx_idle(gpu); } @@ -339,7 +339,7 @@ static void a3xx_destroy(struct msm_gpu *gpu) static bool a3xx_idle(struct msm_gpu *gpu) { /* wait for ringbuffer to drain: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, gpu->rb[0])) return false; /* then wait for GPU to finish: */ @@ -444,9 +444,9 @@ static const struct adreno_gpu_funcs funcs = { .pm_suspend = msm_gpu_pm_suspend, .pm_resume = msm_gpu_pm_resume, .recover = a3xx_recover, - .last_fence = adreno_last_fence, .submit = adreno_submit, .flush = adreno_flush, + .active_ring = adreno_active_ring, .irq = a3xx_irq, .destroy = a3xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -492,7 +492,7 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = a3xx_registers; adreno_gpu->reg_offsets = a3xx_register_offsets; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 58341ef6f15b..8199a4b9f2fa 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -116,7 +116,7 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu) static bool a4xx_me_init(struct msm_gpu *gpu) { - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT3(ring, CP_ME_INIT, 17); OUT_RING(ring, 0x000003f7); @@ -137,7 +137,7 @@ static bool a4xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); return a4xx_idle(gpu); } @@ -337,7 +337,7 @@ static void a4xx_destroy(struct msm_gpu *gpu) static bool a4xx_idle(struct msm_gpu *gpu) { /* wait for ringbuffer to drain: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, gpu->rb[0])) return false; /* then wait for GPU to finish: */ @@ -532,9 +532,9 @@ static const struct adreno_gpu_funcs funcs = { .pm_suspend = a4xx_pm_suspend, .pm_resume = a4xx_pm_resume, .recover = a4xx_recover, - .last_fence = adreno_last_fence, .submit = adreno_submit, .flush = adreno_flush, + .active_ring = adreno_active_ring, .irq = a4xx_irq, .destroy = a4xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -574,7 +574,7 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = a4xx_registers; adreno_gpu->reg_offsets = a4xx_register_offsets; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 17c59d839e6f..a1f4eeeb73e2 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -26,8 +26,9 @@ static void a5xx_dump(struct msm_gpu *gpu); #define GPU_PAS_ID 13 -static int zap_shader_load_mdt(struct device *dev, const char *fwname) +static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname) { + struct device *dev = &gpu->pdev->dev; const struct firmware *fw; struct device_node *np; struct resource r; @@ -55,10 +56,10 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname) mem_size = resource_size(&r); /* Request the MDT file for the firmware */ - ret = request_firmware(&fw, fwname, dev); - if (ret) { + fw = adreno_request_fw(to_adreno_gpu(gpu), fwname); + if (IS_ERR(fw)) { DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname); - return ret; + return PTR_ERR(fw); } /* Figure out how much memory we need */ @@ -75,9 +76,26 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname) goto out; } - /* Load the rest of the MDT */ - ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID, mem_region, mem_phys, - mem_size); + /* + * Load the rest of the MDT + * + * Note that we could be dealing with two different paths, since + * with upstream linux-firmware it would be in a qcom/ subdir.. + * adreno_request_fw() handles this, but qcom_mdt_load() does + * not. But since we've already gotten thru adreno_request_fw() + * we know which of the two cases it is: + */ + if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) { + ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID, + mem_region, mem_phys, mem_size); + } else { + char newname[strlen("qcom/") + strlen(fwname) + 1]; + + sprintf(newname, "qcom/%s", fwname); + + ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID, + mem_region, mem_phys, mem_size); + } if (ret) goto out; @@ -95,14 +113,65 @@ out: return ret; } +static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + uint32_t wptr; + unsigned long flags; + + spin_lock_irqsave(&ring->lock, flags); + + /* Copy the shadow to the actual register */ + ring->cur = ring->next; + + /* Make sure to wrap wptr if we need to */ + wptr = get_wptr(ring); + + spin_unlock_irqrestore(&ring->lock, flags); + + /* Make sure everything is posted before making a decision */ + mb(); + + /* Update HW if this is the current ring and we are not in preempt */ + if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu)) + gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); +} + static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); struct msm_drm_private *priv = gpu->dev->dev_private; - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x02); + + /* Turn off protected mode to write to special registers */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 0); + + /* Set the save preemption record for the ring/command */ + OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); + OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); + OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); + + /* Turn back on protected mode */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 1); + + /* Enable local preemption for finegrain preemption */ + OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x02); + + /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */ + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); + OUT_RING(ring, 0x02); + + /* Submit the commands */ for (i = 0; i < submit->nr_cmds; i++) { switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: @@ -120,16 +189,54 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } } + /* + * Write the render mode to NULL (0) to indicate to the CP that the IBs + * are done rendering - otherwise a lucky preemption would start + * replaying from the last checkpoint + */ + OUT_PKT7(ring, CP_SET_RENDER_MODE, 5); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + + /* Turn off IB level preemptions */ + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); + OUT_RING(ring, 0x01); + + /* Write the fence to the scratch register */ OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, submit->seqno); + /* + * Execute a CACHE_FLUSH_TS event. This will ensure that the + * timestamp is written to the memory and then triggers the interrupt + */ OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31)); - OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, fence))); - OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, fence))); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, submit->seqno); - gpu->funcs->flush(gpu); + /* Yield the floor on command completion */ + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); + /* + * If dword[2:1] are non zero, they specify an address for the CP to + * write the value of dword[3] to on preemption complete. Write 0 to + * skip the write + */ + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + /* Data value - not used if the address above is 0 */ + OUT_RING(ring, 0x01); + /* Set bit 0 to trigger an interrupt on preempt complete */ + OUT_RING(ring, 0x01); + + a5xx_flush(gpu, ring); + + /* Check to see if we need to start preemption */ + a5xx_preempt_trigger(gpu); } static const struct { @@ -245,7 +352,7 @@ void a5xx_set_hwcg(struct msm_gpu *gpu, bool state) static int a5xx_me_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT7(ring, CP_ME_INIT, 8); @@ -276,11 +383,54 @@ static int a5xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); + return a5xx_idle(gpu, ring) ? 0 : -EINVAL; +} + +static int a5xx_preempt_start(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + struct msm_ringbuffer *ring = gpu->rb[0]; + + if (gpu->nr_rings == 1) + return 0; + + /* Turn off protected mode to write to special registers */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 0); + + /* Set the save preemption record for the ring/command */ + OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); + OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id])); + OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id])); + + /* Turn back on protected mode */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 1); + + OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x00); + + OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); + OUT_RING(ring, 0x01); - return a5xx_idle(gpu) ? 0 : -EINVAL; + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); + OUT_RING(ring, 0x01); + + /* Yield the floor on command completion */ + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x01); + OUT_RING(ring, 0x01); + + gpu->funcs->flush(gpu, ring); + + return a5xx_idle(gpu, ring) ? 0 : -EINVAL; } + static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, const struct firmware *fw, u64 *iova) { @@ -381,7 +531,7 @@ static int a5xx_zap_shader_init(struct msm_gpu *gpu) return -ENODEV; } - ret = zap_shader_load_mdt(&pdev->dev, adreno_gpu->info->zapfw); + ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw); loaded = !ret; @@ -396,6 +546,7 @@ static int a5xx_zap_shader_init(struct msm_gpu *gpu) A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \ A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \ + A5XX_RBBM_INT_0_MASK_CP_SW | \ A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) @@ -536,13 +687,14 @@ static int a5xx_hw_init(struct msm_gpu *gpu) REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); - /* Load the GPMU firmware before starting the HW init */ - a5xx_gpmu_ucode_init(gpu); - ret = adreno_hw_init(gpu); if (ret) return ret; + a5xx_preempt_hw_init(gpu); + + a5xx_gpmu_ucode_init(gpu); + ret = a5xx_ucode_init(gpu); if (ret) return ret; @@ -565,11 +717,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu) * ticking correctly */ if (adreno_is_a530(adreno_gpu)) { - OUT_PKT7(gpu->rb, CP_EVENT_WRITE, 1); - OUT_RING(gpu->rb, 0x0F); + OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1); + OUT_RING(gpu->rb[0], 0x0F); - gpu->funcs->flush(gpu); - if (!a5xx_idle(gpu)) + gpu->funcs->flush(gpu, gpu->rb[0]); + if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; } @@ -582,11 +734,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu) */ ret = a5xx_zap_shader_init(gpu); if (!ret) { - OUT_PKT7(gpu->rb, CP_SET_SECURE_MODE, 1); - OUT_RING(gpu->rb, 0x00000000); + OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); + OUT_RING(gpu->rb[0], 0x00000000); - gpu->funcs->flush(gpu); - if (!a5xx_idle(gpu)) + gpu->funcs->flush(gpu, gpu->rb[0]); + if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; } else { /* Print a warning so if we die, we know why */ @@ -595,6 +747,9 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); } + /* Last step - yield the ringbuffer */ + a5xx_preempt_start(gpu); + return 0; } @@ -625,6 +780,8 @@ static void a5xx_destroy(struct msm_gpu *gpu) DBG("%s", gpu->name); + a5xx_preempt_fini(gpu); + if (a5xx_gpu->pm4_bo) { if (a5xx_gpu->pm4_iova) msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace); @@ -660,18 +817,27 @@ static inline bool _a5xx_check_idle(struct msm_gpu *gpu) A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT); } -bool a5xx_idle(struct msm_gpu *gpu) +bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + + if (ring != a5xx_gpu->cur_ring) { + WARN(1, "Tried to idle a non-current ringbuffer\n"); + return false; + } + /* wait for CP to drain ringbuffer: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, ring)) return false; if (spin_until(_a5xx_check_idle(gpu))) { - DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X\n", + DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", gpu->name, __builtin_return_address(0), gpu_read(gpu, REG_A5XX_RBBM_STATUS), - gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS)); - + gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS), + gpu_read(gpu, REG_A5XX_CP_RB_RPTR), + gpu_read(gpu, REG_A5XX_CP_RB_WPTR)); return false; } @@ -802,9 +968,10 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); - dev_err(dev->dev, "gpu fault fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", - gpu->funcs->last_fence(gpu), + dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", + ring ? ring->id : -1, ring ? ring->seqno : 0, gpu_read(gpu, REG_A5XX_RBBM_STATUS), gpu_read(gpu, REG_A5XX_CP_RB_RPTR), gpu_read(gpu, REG_A5XX_CP_RB_WPTR), @@ -854,8 +1021,13 @@ static irqreturn_t a5xx_irq(struct msm_gpu *gpu) if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) a5xx_gpmu_err_irq(gpu); - if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) + if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { + a5xx_preempt_trigger(gpu); msm_gpu_retire(gpu); + } + + if (status & A5XX_RBBM_INT_0_MASK_CP_SW) + a5xx_preempt_irq(gpu); return IRQ_HANDLED; } @@ -985,6 +1157,14 @@ static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m) } #endif +static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + + return a5xx_gpu->cur_ring; +} + static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, @@ -992,9 +1172,9 @@ static const struct adreno_gpu_funcs funcs = { .pm_suspend = a5xx_pm_suspend, .pm_resume = a5xx_pm_resume, .recover = a5xx_recover, - .last_fence = adreno_last_fence, .submit = a5xx_submit, - .flush = adreno_flush, + .flush = a5xx_flush, + .active_ring = a5xx_active_ring, .irq = a5xx_irq, .destroy = a5xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -1030,7 +1210,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) a5xx_gpu->lm_leakage = 0x4E001A; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4); if (ret) { a5xx_destroy(&(a5xx_gpu->base.base)); return ERR_PTR(ret); @@ -1039,5 +1219,8 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) if (gpu->aspace) msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler); + /* Set up the preemption specific bits and pieces for each ringbuffer */ + a5xx_preempt_init(gpu); + return gpu; } diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index e94451685bf8..6fb8c2f9b9e4 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 The Linux Foundation. All rights reserved. +/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -35,10 +35,100 @@ struct a5xx_gpu { uint32_t gpmu_dwords; uint32_t lm_leakage; + + struct msm_ringbuffer *cur_ring; + struct msm_ringbuffer *next_ring; + + struct drm_gem_object *preempt_bo[MSM_GPU_MAX_RINGS]; + struct a5xx_preempt_record *preempt[MSM_GPU_MAX_RINGS]; + uint64_t preempt_iova[MSM_GPU_MAX_RINGS]; + + atomic_t preempt_state; + struct timer_list preempt_timer; }; #define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base) +/* + * In order to do lockless preemption we use a simple state machine to progress + * through the process. + * + * PREEMPT_NONE - no preemption in progress. Next state START. + * PREEMPT_START - The trigger is evaulating if preemption is possible. Next + * states: TRIGGERED, NONE + * PREEMPT_ABORT - An intermediate state before moving back to NONE. Next + * state: NONE. + * PREEMPT_TRIGGERED: A preemption has been executed on the hardware. Next + * states: FAULTED, PENDING + * PREEMPT_FAULTED: A preemption timed out (never completed). This will trigger + * recovery. Next state: N/A + * PREEMPT_PENDING: Preemption complete interrupt fired - the callback is + * checking the success of the operation. Next state: FAULTED, NONE. + */ + +enum preempt_state { + PREEMPT_NONE = 0, + PREEMPT_START, + PREEMPT_ABORT, + PREEMPT_TRIGGERED, + PREEMPT_FAULTED, + PREEMPT_PENDING, +}; + +/* + * struct a5xx_preempt_record is a shared buffer between the microcode and the + * CPU to store the state for preemption. The record itself is much larger + * (64k) but most of that is used by the CP for storage. + * + * There is a preemption record assigned per ringbuffer. When the CPU triggers a + * preemption, it fills out the record with the useful information (wptr, ring + * base, etc) and the microcode uses that information to set up the CP following + * the preemption. When a ring is switched out, the CP will save the ringbuffer + * state back to the record. In this way, once the records are properly set up + * the CPU can quickly switch back and forth between ringbuffers by only + * updating a few registers (often only the wptr). + * + * These are the CPU aware registers in the record: + * @magic: Must always be 0x27C4BAFC + * @info: Type of the record - written 0 by the CPU, updated by the CP + * @data: Data field from SET_RENDER_MODE or a checkpoint. Written and used by + * the CP + * @cntl: Value of RB_CNTL written by CPU, save/restored by CP + * @rptr: Value of RB_RPTR written by CPU, save/restored by CP + * @wptr: Value of RB_WPTR written by CPU, save/restored by CP + * @rptr_addr: Value of RB_RPTR_ADDR written by CPU, save/restored by CP + * @rbase: Value of RB_BASE written by CPU, save/restored by CP + * @counter: GPU address of the storage area for the performance counters + */ +struct a5xx_preempt_record { + uint32_t magic; + uint32_t info; + uint32_t data; + uint32_t cntl; + uint32_t rptr; + uint32_t wptr; + uint64_t rptr_addr; + uint64_t rbase; + uint64_t counter; +}; + +/* Magic identifier for the preemption record */ +#define A5XX_PREEMPT_RECORD_MAGIC 0x27C4BAFCUL + +/* + * Even though the structure above is only a few bytes, we need a full 64k to + * store the entire preemption record from the CP + */ +#define A5XX_PREEMPT_RECORD_SIZE (64 * 1024) + +/* + * The preemption counter block is a storage area for the value of the + * preemption counters that are saved immediately before context switch. We + * append it on to the end of the allocation for the preemption record. + */ +#define A5XX_PREEMPT_COUNTER_SIZE (16 * 4) + + int a5xx_power_init(struct msm_gpu *gpu); void a5xx_gpmu_ucode_init(struct msm_gpu *gpu); @@ -55,7 +145,22 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs, return -ETIMEDOUT; } -bool a5xx_idle(struct msm_gpu *gpu); +bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); void a5xx_set_hwcg(struct msm_gpu *gpu, bool state); +void a5xx_preempt_init(struct msm_gpu *gpu); +void a5xx_preempt_hw_init(struct msm_gpu *gpu); +void a5xx_preempt_trigger(struct msm_gpu *gpu); +void a5xx_preempt_irq(struct msm_gpu *gpu); +void a5xx_preempt_fini(struct msm_gpu *gpu); + +/* Return true if we are in a preempt state */ +static inline bool a5xx_in_preempt(struct a5xx_gpu *a5xx_gpu) +{ + int preempt_state = atomic_read(&a5xx_gpu->preempt_state); + + return !(preempt_state == PREEMPT_NONE || + preempt_state == PREEMPT_ABORT); +} + #endif /* __A5XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index 04aab1dcae2b..e5700bbf09dd 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -173,7 +173,7 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; if (!a5xx_gpu->gpmu_dwords) return 0; @@ -192,9 +192,9 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); OUT_RING(ring, 1); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); - if (!a5xx_idle(gpu)) { + if (!a5xx_idle(gpu, ring)) { DRM_ERROR("%s: Unable to load GPMU firmware. GPMU will not be active\n", gpu->name); return -EINVAL; @@ -264,7 +264,8 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) return; /* Get the firmware */ - if (request_firmware(&fw, adreno_gpu->info->gpmufw, drm->dev)) { + fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->gpmufw); + if (IS_ERR(fw)) { DRM_ERROR("%s: Could not get GPMU firmware. GPMU will not be active\n", gpu->name); return; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c new file mode 100644 index 000000000000..40f4840ef98e --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -0,0 +1,305 @@ +/* Copyright (c) 2017 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "msm_gem.h" +#include "a5xx_gpu.h" + +/* + * Try to transition the preemption state from old to new. Return + * true on success or false if the original state wasn't 'old' + */ +static inline bool try_preempt_state(struct a5xx_gpu *a5xx_gpu, + enum preempt_state old, enum preempt_state new) +{ + enum preempt_state cur = atomic_cmpxchg(&a5xx_gpu->preempt_state, + old, new); + + return (cur == old); +} + +/* + * Force the preemption state to the specified state. This is used in cases + * where the current state is known and won't change + */ +static inline void set_preempt_state(struct a5xx_gpu *gpu, + enum preempt_state new) +{ + /* + * preempt_state may be read by other cores trying to trigger a + * preemption or in the interrupt handler so barriers are needed + * before... + */ + smp_mb__before_atomic(); + atomic_set(&gpu->preempt_state, new); + /* ... and after*/ + smp_mb__after_atomic(); +} + +/* Write the most recent wptr for the given ring into the hardware */ +static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + unsigned long flags; + uint32_t wptr; + + if (!ring) + return; + + spin_lock_irqsave(&ring->lock, flags); + wptr = get_wptr(ring); + spin_unlock_irqrestore(&ring->lock, flags); + + gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); +} + +/* Return the highest priority ringbuffer with something in it */ +static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu) +{ + unsigned long flags; + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + bool empty; + struct msm_ringbuffer *ring = gpu->rb[i]; + + spin_lock_irqsave(&ring->lock, flags); + empty = (get_wptr(ring) == ring->memptrs->rptr); + spin_unlock_irqrestore(&ring->lock, flags); + + if (!empty) + return ring; + } + + return NULL; +} + +static void a5xx_preempt_timer(unsigned long data) +{ + struct a5xx_gpu *a5xx_gpu = (struct a5xx_gpu *) data; + struct msm_gpu *gpu = &a5xx_gpu->base.base; + struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; + + if (!try_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED, PREEMPT_FAULTED)) + return; + + dev_err(dev->dev, "%s: preemption timed out\n", gpu->name); + queue_work(priv->wq, &gpu->recover_work); +} + +/* Try to trigger a preemption switch */ +void a5xx_preempt_trigger(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + unsigned long flags; + struct msm_ringbuffer *ring; + + if (gpu->nr_rings == 1) + return; + + /* + * Try to start preemption by moving from NONE to START. If + * unsuccessful, a preemption is already in flight + */ + if (!try_preempt_state(a5xx_gpu, PREEMPT_NONE, PREEMPT_START)) + return; + + /* Get the next ring to preempt to */ + ring = get_next_ring(gpu); + + /* + * If no ring is populated or the highest priority ring is the current + * one do nothing except to update the wptr to the latest and greatest + */ + if (!ring || (a5xx_gpu->cur_ring == ring)) { + /* + * Its possible that while a preemption request is in progress + * from an irq context, a user context trying to submit might + * fail to update the write pointer, because it determines + * that the preempt state is not PREEMPT_NONE. + * + * Close the race by introducing an intermediate + * state PREEMPT_ABORT to let the submit path + * know that the ringbuffer is not going to change + * and can safely update the write pointer. + */ + + set_preempt_state(a5xx_gpu, PREEMPT_ABORT); + update_wptr(gpu, a5xx_gpu->cur_ring); + set_preempt_state(a5xx_gpu, PREEMPT_NONE); + return; + } + + /* Make sure the wptr doesn't update while we're in motion */ + spin_lock_irqsave(&ring->lock, flags); + a5xx_gpu->preempt[ring->id]->wptr = get_wptr(ring); + spin_unlock_irqrestore(&ring->lock, flags); + + /* Set the address of the incoming preemption record */ + gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO, + REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI, + a5xx_gpu->preempt_iova[ring->id]); + + a5xx_gpu->next_ring = ring; + + /* Start a timer to catch a stuck preemption */ + mod_timer(&a5xx_gpu->preempt_timer, jiffies + msecs_to_jiffies(10000)); + + /* Set the preemption state to triggered */ + set_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED); + + /* Make sure everything is written before hitting the button */ + wmb(); + + /* And actually start the preemption */ + gpu_write(gpu, REG_A5XX_CP_CONTEXT_SWITCH_CNTL, 1); +} + +void a5xx_preempt_irq(struct msm_gpu *gpu) +{ + uint32_t status; + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; + + if (!try_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED, PREEMPT_PENDING)) + return; + + /* Delete the preemption watchdog timer */ + del_timer(&a5xx_gpu->preempt_timer); + + /* + * The hardware should be setting CP_CONTEXT_SWITCH_CNTL to zero before + * firing the interrupt, but there is a non zero chance of a hardware + * condition or a software race that could set it again before we have a + * chance to finish. If that happens, log and go for recovery + */ + status = gpu_read(gpu, REG_A5XX_CP_CONTEXT_SWITCH_CNTL); + if (unlikely(status)) { + set_preempt_state(a5xx_gpu, PREEMPT_FAULTED); + dev_err(dev->dev, "%s: Preemption failed to complete\n", + gpu->name); + queue_work(priv->wq, &gpu->recover_work); + return; + } + + a5xx_gpu->cur_ring = a5xx_gpu->next_ring; + a5xx_gpu->next_ring = NULL; + + update_wptr(gpu, a5xx_gpu->cur_ring); + + set_preempt_state(a5xx_gpu, PREEMPT_NONE); +} + +void a5xx_preempt_hw_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + a5xx_gpu->preempt[i]->wptr = 0; + a5xx_gpu->preempt[i]->rptr = 0; + a5xx_gpu->preempt[i]->rbase = gpu->rb[i]->iova; + } + + /* Write a 0 to signal that we aren't switching pagetables */ + gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0); + + /* Reset the preemption state */ + set_preempt_state(a5xx_gpu, PREEMPT_NONE); + + /* Always come up on rb 0 */ + a5xx_gpu->cur_ring = gpu->rb[0]; +} + +static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu, + struct msm_ringbuffer *ring) +{ + struct adreno_gpu *adreno_gpu = &a5xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + struct a5xx_preempt_record *ptr; + struct drm_gem_object *bo = NULL; + u64 iova = 0; + + ptr = msm_gem_kernel_new(gpu->dev, + A5XX_PREEMPT_RECORD_SIZE + A5XX_PREEMPT_COUNTER_SIZE, + MSM_BO_UNCACHED, gpu->aspace, &bo, &iova); + + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + a5xx_gpu->preempt_bo[ring->id] = bo; + a5xx_gpu->preempt_iova[ring->id] = iova; + a5xx_gpu->preempt[ring->id] = ptr; + + /* Set up the defaults on the preemption record */ + + ptr->magic = A5XX_PREEMPT_RECORD_MAGIC; + ptr->info = 0; + ptr->data = 0; + ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT; + ptr->rptr_addr = rbmemptr(ring, rptr); + ptr->counter = iova + A5XX_PREEMPT_RECORD_SIZE; + + return 0; +} + +void a5xx_preempt_fini(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + if (!a5xx_gpu->preempt_bo[i]) + continue; + + msm_gem_put_vaddr(a5xx_gpu->preempt_bo[i]); + + if (a5xx_gpu->preempt_iova[i]) + msm_gem_put_iova(a5xx_gpu->preempt_bo[i], gpu->aspace); + + drm_gem_object_unreference(a5xx_gpu->preempt_bo[i]); + a5xx_gpu->preempt_bo[i] = NULL; + } +} + +void a5xx_preempt_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + int i; + + /* No preemption if we only have one ring */ + if (gpu->nr_rings <= 1) + return; + + for (i = 0; i < gpu->nr_rings; i++) { + if (preempt_init_ring(a5xx_gpu, gpu->rb[i])) { + /* + * On any failure our adventure is over. Clean up and + * set nr_rings to 1 to force preemption off + */ + a5xx_preempt_fini(gpu); + gpu->nr_rings = 1; + + return; + } + } + + setup_timer(&a5xx_gpu->preempt_timer, a5xx_preempt_timer, + (unsigned long) a5xx_gpu); +} diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index c75c4df4bc39..05022ea2a007 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -125,51 +125,24 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev) { struct msm_drm_private *priv = dev->dev_private; struct platform_device *pdev = priv->gpu_pdev; - struct adreno_platform_config *config; - struct adreno_rev rev; - const struct adreno_info *info; - struct msm_gpu *gpu = NULL; + struct msm_gpu *gpu = platform_get_drvdata(priv->gpu_pdev); + int ret; - if (!pdev) { + if (!gpu) { dev_err(dev->dev, "no adreno device\n"); return NULL; } - config = pdev->dev.platform_data; - rev = config->rev; - info = adreno_info(config->rev); - - if (!info) { - dev_warn(dev->dev, "Unknown GPU revision: %u.%u.%u.%u\n", - rev.core, rev.major, rev.minor, rev.patchid); + pm_runtime_get_sync(&pdev->dev); + mutex_lock(&dev->struct_mutex); + ret = msm_gpu_hw_init(gpu); + mutex_unlock(&dev->struct_mutex); + pm_runtime_put_sync(&pdev->dev); + if (ret) { + dev_err(dev->dev, "gpu hw init failed: %d\n", ret); return NULL; } - DBG("Found GPU: %u.%u.%u.%u", rev.core, rev.major, - rev.minor, rev.patchid); - - gpu = info->init(dev); - if (IS_ERR(gpu)) { - dev_warn(dev->dev, "failed to load adreno gpu\n"); - gpu = NULL; - /* not fatal */ - } - - if (gpu) { - int ret; - - pm_runtime_get_sync(&pdev->dev); - mutex_lock(&dev->struct_mutex); - ret = msm_gpu_hw_init(gpu); - mutex_unlock(&dev->struct_mutex); - pm_runtime_put_sync(&pdev->dev); - if (ret) { - dev_err(dev->dev, "gpu hw init failed: %d\n", ret); - gpu->funcs->destroy(gpu); - gpu = NULL; - } - } - return gpu; } @@ -282,6 +255,9 @@ static int adreno_get_pwrlevels(struct device *dev, static int adreno_bind(struct device *dev, struct device *master, void *data) { static struct adreno_platform_config config = {}; + const struct adreno_info *info; + struct drm_device *drm = dev_get_drvdata(master); + struct msm_gpu *gpu; u32 val; int ret; @@ -302,13 +278,39 @@ static int adreno_bind(struct device *dev, struct device *master, void *data) return ret; dev->platform_data = &config; - set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev)); + set_gpu_pdev(drm, to_platform_device(dev)); + + info = adreno_info(config.rev); + + if (!info) { + dev_warn(drm->dev, "Unknown GPU revision: %u.%u.%u.%u\n", + config.rev.core, config.rev.major, + config.rev.minor, config.rev.patchid); + return -ENXIO; + } + + DBG("Found GPU: %u.%u.%u.%u", config.rev.core, config.rev.major, + config.rev.minor, config.rev.patchid); + + gpu = info->init(drm); + if (IS_ERR(gpu)) { + dev_warn(drm->dev, "failed to load adreno gpu\n"); + return PTR_ERR(gpu); + } + + dev_set_drvdata(dev, gpu); + return 0; } static void adreno_unbind(struct device *dev, struct device *master, void *data) { + struct msm_gpu *gpu = dev_get_drvdata(dev); + + gpu->funcs->pm_suspend(gpu); + gpu->funcs->destroy(gpu); + set_gpu_pdev(dev_get_drvdata(master), NULL); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index c8b4ac254bb5..e2ffecce59a3 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -21,8 +21,6 @@ #include "msm_gem.h" #include "msm_mmu.h" -#define RB_SIZE SZ_32K -#define RB_BLKSIZE 32 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) { @@ -58,72 +56,181 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) return ret; } return -EINVAL; + case MSM_PARAM_NR_RINGS: + *value = gpu->nr_rings; + return 0; default: DBG("%s: invalid param: %u", gpu->name, param); return -EINVAL; } } +const struct firmware * +adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname) +{ + struct drm_device *drm = adreno_gpu->base.dev; + const struct firmware *fw = NULL; + char newname[strlen("qcom/") + strlen(fwname) + 1]; + int ret; + + sprintf(newname, "qcom/%s", fwname); + + /* + * Try first to load from qcom/$fwfile using a direct load (to avoid + * a potential timeout waiting for usermode helper) + */ + if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || + (adreno_gpu->fwloc == FW_LOCATION_NEW)) { + + ret = request_firmware_direct(&fw, newname, drm->dev); + if (!ret) { + dev_info(drm->dev, "loaded %s from new location\n", + newname); + adreno_gpu->fwloc = FW_LOCATION_NEW; + return fw; + } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { + dev_err(drm->dev, "failed to load %s: %d\n", + newname, ret); + return ERR_PTR(ret); + } + } + + /* + * Then try the legacy location without qcom/ prefix + */ + if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || + (adreno_gpu->fwloc == FW_LOCATION_LEGACY)) { + + ret = request_firmware_direct(&fw, fwname, drm->dev); + if (!ret) { + dev_info(drm->dev, "loaded %s from legacy location\n", + newname); + adreno_gpu->fwloc = FW_LOCATION_LEGACY; + return fw; + } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { + dev_err(drm->dev, "failed to load %s: %d\n", + fwname, ret); + return ERR_PTR(ret); + } + } + + /* + * Finally fall back to request_firmware() for cases where the + * usermode helper is needed (I think mainly android) + */ + if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || + (adreno_gpu->fwloc == FW_LOCATION_HELPER)) { + + ret = request_firmware(&fw, newname, drm->dev); + if (!ret) { + dev_info(drm->dev, "loaded %s with helper\n", + newname); + adreno_gpu->fwloc = FW_LOCATION_HELPER; + return fw; + } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { + dev_err(drm->dev, "failed to load %s: %d\n", + newname, ret); + return ERR_PTR(ret); + } + } + + dev_err(drm->dev, "failed to load %s\n", fwname); + return ERR_PTR(-ENOENT); +} + +static int adreno_load_fw(struct adreno_gpu *adreno_gpu) +{ + const struct firmware *fw; + + if (adreno_gpu->pm4) + return 0; + + fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pm4fw); + if (IS_ERR(fw)) + return PTR_ERR(fw); + adreno_gpu->pm4 = fw; + + fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pfpfw); + if (IS_ERR(fw)) { + release_firmware(adreno_gpu->pm4); + adreno_gpu->pm4 = NULL; + return PTR_ERR(fw); + } + adreno_gpu->pfp = fw; + + return 0; +} + int adreno_hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - int ret; + int ret, i; DBG("%s", gpu->name); - ret = msm_gem_get_iova(gpu->rb->bo, gpu->aspace, &gpu->rb_iova); - if (ret) { - gpu->rb_iova = 0; - dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret); + ret = adreno_load_fw(adreno_gpu); + if (ret) return ret; - } - /* reset ringbuffer: */ - gpu->rb->cur = gpu->rb->start; + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + if (!ring) + continue; + + ret = msm_gem_get_iova(ring->bo, gpu->aspace, &ring->iova); + if (ret) { + ring->iova = 0; + dev_err(gpu->dev->dev, + "could not map ringbuffer %d: %d\n", i, ret); + return ret; + } + + ring->cur = ring->start; + ring->next = ring->start; - /* reset completed fence seqno: */ - adreno_gpu->memptrs->fence = gpu->fctx->completed_fence; - adreno_gpu->memptrs->rptr = 0; + /* reset completed fence seqno: */ + ring->memptrs->fence = ring->seqno; + ring->memptrs->rptr = 0; + } - /* Setup REG_CP_RB_CNTL: */ + /* + * Setup REG_CP_RB_CNTL. The same value is used across targets (with + * the excpetion of A430 that disables the RPTR shadow) - the cacluation + * for the ringbuffer size and block size is moved to msm_gpu.h for the + * pre-processor to deal with and the A430 variant is ORed in here + */ adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, - /* size is log2(quad-words): */ - AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) | - AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) | - (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); + MSM_GPU_RB_CNTL_DEFAULT | + (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); - /* Setup ringbuffer address: */ + /* Setup ringbuffer address - use ringbuffer[0] for GPU init */ adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE, - REG_ADRENO_CP_RB_BASE_HI, gpu->rb_iova); + REG_ADRENO_CP_RB_BASE_HI, gpu->rb[0]->iova); if (!adreno_is_a430(adreno_gpu)) { adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, REG_ADRENO_CP_RB_RPTR_ADDR_HI, - rbmemptr(adreno_gpu, rptr)); + rbmemptr(gpu->rb[0], rptr)); } return 0; } -static uint32_t get_wptr(struct msm_ringbuffer *ring) -{ - return ring->cur - ring->start; -} - /* Use this helper to read rptr, since a430 doesn't update rptr in memory */ -static uint32_t get_rptr(struct adreno_gpu *adreno_gpu) +static uint32_t get_rptr(struct adreno_gpu *adreno_gpu, + struct msm_ringbuffer *ring) { if (adreno_is_a430(adreno_gpu)) - return adreno_gpu->memptrs->rptr = adreno_gpu_read( + return ring->memptrs->rptr = adreno_gpu_read( adreno_gpu, REG_ADRENO_CP_RB_RPTR); else - return adreno_gpu->memptrs->rptr; + return ring->memptrs->rptr; } -uint32_t adreno_last_fence(struct msm_gpu *gpu) +struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - return adreno_gpu->memptrs->fence; + return gpu->rb[0]; } void adreno_recover(struct msm_gpu *gpu) @@ -149,7 +256,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct msm_drm_private *priv = gpu->dev->dev_private; - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = submit->ring; unsigned i; for (i = 0; i < submit->nr_cmds; i++) { @@ -164,7 +271,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, case MSM_SUBMIT_CMD_BUF: OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2); - OUT_RING(ring, submit->cmd[i].iova); + OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, submit->cmd[i].size); OUT_PKT2(ring); break; @@ -172,7 +279,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, submit->seqno); if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) { /* Flush HLSQ lazy updates to make sure there is nothing @@ -188,8 +295,8 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, OUT_PKT3(ring, CP_EVENT_WRITE, 3); OUT_RING(ring, CACHE_FLUSH_TS); - OUT_RING(ring, rbmemptr(adreno_gpu, fence)); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, rbmemptr(ring, fence)); + OUT_RING(ring, submit->seqno); /* we could maybe be clever and only CP_COND_EXEC the interrupt: */ OUT_PKT3(ring, CP_INTERRUPT, 1); @@ -215,20 +322,23 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } #endif - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); } -void adreno_flush(struct msm_gpu *gpu) +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); uint32_t wptr; + /* Copy the shadow to the actual register */ + ring->cur = ring->next; + /* * Mask wptr value that we calculate to fit in the HW range. This is * to account for the possibility that the last command fit exactly into * the ringbuffer and rb->next hasn't wrapped to zero yet */ - wptr = get_wptr(gpu->rb) & ((gpu->rb->size / 4) - 1); + wptr = get_wptr(ring); /* ensure writes to ringbuffer have hit system memory: */ mb(); @@ -236,17 +346,19 @@ void adreno_flush(struct msm_gpu *gpu) adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr); } -bool adreno_idle(struct msm_gpu *gpu) +bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t wptr = get_wptr(gpu->rb); + uint32_t wptr = get_wptr(ring); /* wait for CP to drain ringbuffer: */ - if (!spin_until(get_rptr(adreno_gpu) == wptr)) + if (!spin_until(get_rptr(adreno_gpu, ring) == wptr)) return true; /* TODO maybe we need to reset GPU here to recover from hang? */ - DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name); + DRM_ERROR("%s: timeout waiting to drain ringbuffer %d rptr/wptr = %X/%X\n", + gpu->name, ring->id, get_rptr(adreno_gpu, ring), wptr); + return false; } @@ -261,10 +373,16 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.patchid); - seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence, - gpu->fctx->last_fence); - seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu)); - seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + seq_printf(m, "rb %d: fence: %d/%d\n", i, + ring->memptrs->fence, ring->seqno); + + seq_printf(m, " rptr: %d\n", + get_rptr(adreno_gpu, ring)); + seq_printf(m, "rb wptr: %d\n", get_wptr(ring)); + } /* dump these out in a form that can be parsed by demsm: */ seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name); @@ -290,16 +408,23 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) void adreno_dump_info(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + int i; printk("revision: %d (%d.%d.%d.%d)\n", adreno_gpu->info->revn, adreno_gpu->rev.core, adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.patchid); - printk("fence: %d/%d\n", adreno_gpu->memptrs->fence, - gpu->fctx->last_fence); - printk("rptr: %d\n", get_rptr(adreno_gpu)); - printk("rb wptr: %d\n", get_wptr(gpu->rb)); + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + printk("rb %d: fence: %d/%d\n", i, + ring->memptrs->fence, + ring->seqno); + + printk("rptr: %d\n", get_rptr(adreno_gpu, ring)); + printk("rb wptr: %d\n", get_wptr(ring)); + } } /* would be nice to not have to duplicate the _show() stuff with printk(): */ @@ -322,28 +447,31 @@ void adreno_dump(struct msm_gpu *gpu) } } -static uint32_t ring_freewords(struct msm_gpu *gpu) +static uint32_t ring_freewords(struct msm_ringbuffer *ring) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t size = gpu->rb->size / 4; - uint32_t wptr = get_wptr(gpu->rb); - uint32_t rptr = get_rptr(adreno_gpu); + struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu); + uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2; + /* Use ring->next to calculate free size */ + uint32_t wptr = ring->next - ring->start; + uint32_t rptr = get_rptr(adreno_gpu, ring); return (rptr + (size - 1) - wptr) % size; } -void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords) +void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords) { - if (spin_until(ring_freewords(gpu) >= ndwords)) - DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name); + if (spin_until(ring_freewords(ring) >= ndwords)) + DRM_DEV_ERROR(ring->gpu->dev->dev, + "timeout waiting for space in ringubffer %d\n", + ring->id); } int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, - struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs) + struct adreno_gpu *adreno_gpu, + const struct adreno_gpu_funcs *funcs, int nr_rings) { struct adreno_platform_config *config = pdev->dev.platform_data; struct msm_gpu_config adreno_gpu_config = { 0 }; struct msm_gpu *gpu = &adreno_gpu->base; - int ret; adreno_gpu->funcs = funcs; adreno_gpu->info = adreno_info(config->rev); @@ -366,59 +494,20 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, adreno_gpu_config.va_start = SZ_16M; adreno_gpu_config.va_end = 0xffffffff; - adreno_gpu_config.ringsz = RB_SIZE; + adreno_gpu_config.nr_rings = nr_rings; pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD); pm_runtime_use_autosuspend(&pdev->dev); pm_runtime_enable(&pdev->dev); - ret = msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base, + return msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base, adreno_gpu->info->name, &adreno_gpu_config); - if (ret) - return ret; - - ret = request_firmware(&adreno_gpu->pm4, adreno_gpu->info->pm4fw, drm->dev); - if (ret) { - dev_err(drm->dev, "failed to load %s PM4 firmware: %d\n", - adreno_gpu->info->pm4fw, ret); - return ret; - } - - ret = request_firmware(&adreno_gpu->pfp, adreno_gpu->info->pfpfw, drm->dev); - if (ret) { - dev_err(drm->dev, "failed to load %s PFP firmware: %d\n", - adreno_gpu->info->pfpfw, ret); - return ret; - } - - adreno_gpu->memptrs = msm_gem_kernel_new(drm, - sizeof(*adreno_gpu->memptrs), MSM_BO_UNCACHED, gpu->aspace, - &adreno_gpu->memptrs_bo, &adreno_gpu->memptrs_iova); - - if (IS_ERR(adreno_gpu->memptrs)) { - ret = PTR_ERR(adreno_gpu->memptrs); - adreno_gpu->memptrs = NULL; - dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); - } - - return ret; } void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) { - struct msm_gpu *gpu = &adreno_gpu->base; - - if (adreno_gpu->memptrs_bo) { - if (adreno_gpu->memptrs) - msm_gem_put_vaddr(adreno_gpu->memptrs_bo); - - if (adreno_gpu->memptrs_iova) - msm_gem_put_iova(adreno_gpu->memptrs_bo, gpu->aspace); - - drm_gem_object_unreference_unlocked(adreno_gpu->memptrs_bo); - } release_firmware(adreno_gpu->pm4); release_firmware(adreno_gpu->pfp); - msm_gpu_cleanup(gpu); + msm_gpu_cleanup(&adreno_gpu->base); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 4d9165f29f43..28e3de6e5f94 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -2,7 +2,7 @@ * Copyright (C) 2013 Red Hat * Author: Rob Clark <robdclark@gmail.com> * - * Copyright (c) 2014 The Linux Foundation. All rights reserved. + * Copyright (c) 2014,2017 The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published by @@ -82,14 +82,6 @@ struct adreno_info { const struct adreno_info *adreno_info(struct adreno_rev rev); -#define rbmemptr(adreno_gpu, member) \ - ((adreno_gpu)->memptrs_iova + offsetof(struct adreno_rbmemptrs, member)) - -struct adreno_rbmemptrs { - volatile uint32_t rptr; - volatile uint32_t fence; -}; - struct adreno_gpu { struct msm_gpu base; struct adreno_rev rev; @@ -101,16 +93,30 @@ struct adreno_gpu { /* interesting register offsets to dump: */ const unsigned int *registers; + /* + * Are we loading fw from legacy path? Prior to addition + * of gpu firmware to linux-firmware, the fw files were + * placed in toplevel firmware directory, following qcom's + * android kernel. But linux-firmware preferred they be + * placed in a 'qcom' subdirectory. + * + * For backwards compatibility, we try first to load from + * the new path, using request_firmware_direct() to avoid + * any potential timeout waiting for usermode helper, then + * fall back to the old path (with direct load). And + * finally fall back to request_firmware() with the new + * path to allow the usermode helper. + */ + enum { + FW_LOCATION_UNKNOWN = 0, + FW_LOCATION_NEW, /* /lib/firmware/qcom/$fwfile */ + FW_LOCATION_LEGACY, /* /lib/firmware/$fwfile */ + FW_LOCATION_HELPER, + } fwloc; + /* firmware: */ const struct firmware *pm4, *pfp; - /* ringbuffer rptr/wptr: */ - // TODO should this be in msm_ringbuffer? I think it would be - // different for z180.. - struct adreno_rbmemptrs *memptrs; - struct drm_gem_object *memptrs_bo; - uint64_t memptrs_iova; - /* * Register offsets are different between some GPUs. * GPU specific offsets will be exported by GPU specific @@ -196,22 +202,25 @@ static inline int adreno_is_a530(struct adreno_gpu *gpu) } int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value); +const struct firmware *adreno_request_fw(struct adreno_gpu *adreno_gpu, + const char *fwname); int adreno_hw_init(struct msm_gpu *gpu); -uint32_t adreno_last_fence(struct msm_gpu *gpu); void adreno_recover(struct msm_gpu *gpu); void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); -void adreno_flush(struct msm_gpu *gpu); -bool adreno_idle(struct msm_gpu *gpu); +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring); +bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); #ifdef CONFIG_DEBUG_FS void adreno_show(struct msm_gpu *gpu, struct seq_file *m); #endif void adreno_dump_info(struct msm_gpu *gpu); void adreno_dump(struct msm_gpu *gpu); -void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords); +void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords); +struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu); int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, - struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs); + struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, + int nr_rings); void adreno_gpu_cleanup(struct adreno_gpu *gpu); @@ -220,7 +229,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu); static inline void OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt+1); + adreno_wait_ring(ring, cnt+1); OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); } @@ -228,14 +237,14 @@ OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) static inline void OUT_PKT2(struct msm_ringbuffer *ring) { - adreno_wait_ring(ring->gpu, 1); + adreno_wait_ring(ring, 1); OUT_RING(ring, CP_TYPE2_PKT); } static inline void OUT_PKT3(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt+1); + adreno_wait_ring(ring, cnt+1); OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); } @@ -257,14 +266,14 @@ static inline u32 PM4_PARITY(u32 val) static inline void OUT_PKT4(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt + 1); + adreno_wait_ring(ring, cnt + 1); OUT_RING(ring, PKT4(regindx, cnt)); } static inline void OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt + 1); + adreno_wait_ring(ring, cnt + 1); OUT_RING(ring, CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) | ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23)); } @@ -323,6 +332,11 @@ static inline void adreno_gpu_write64(struct adreno_gpu *gpu, adreno_gpu_write(gpu, hi, upper_32_bits(data)); } +static inline uint32_t get_wptr(struct msm_ringbuffer *ring) +{ + return (ring->cur - ring->start) % (MSM_GPU_RINGBUFFER_SZ >> 2); +} + /* * Given a register and a count, return a value to program into * REG_CP_PROTECT_REG(n) - this will block both reads and writes for _len diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c index a5d75c9b3a73..65c1dfbbe019 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c @@ -14,7 +14,7 @@ #include "dsi_cfg.h" static const char * const dsi_v2_bus_clk_names[] = { - "core_mmss_clk", "iface_clk", "bus_clk", + "core_mmss", "iface", "bus", }; static const struct msm_dsi_config apq8064_dsi_cfg = { @@ -34,7 +34,7 @@ static const struct msm_dsi_config apq8064_dsi_cfg = { }; static const char * const dsi_6g_bus_clk_names[] = { - "mdp_core_clk", "iface_clk", "bus_clk", "core_mmss_clk", + "mdp_core", "iface", "bus", "core_mmss", }; static const struct msm_dsi_config msm8974_apq8084_dsi_cfg = { @@ -55,7 +55,7 @@ static const struct msm_dsi_config msm8974_apq8084_dsi_cfg = { }; static const char * const dsi_8916_bus_clk_names[] = { - "mdp_core_clk", "iface_clk", "bus_clk", + "mdp_core", "iface", "bus", }; static const struct msm_dsi_config msm8916_dsi_cfg = { @@ -99,7 +99,7 @@ static const struct msm_dsi_config msm8994_dsi_cfg = { * without it too. Figure out why it doesn't enable and uncomment below */ static const char * const dsi_8996_bus_clk_names[] = { - "mdp_core_clk", "iface_clk", "bus_clk", /* "core_mmss_clk", */ + "mdp_core", "iface", "bus", /* "core_mmss", */ }; static const struct msm_dsi_config msm8996_dsi_cfg = { diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index dbb31a014419..0f7324a686ca 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -248,7 +248,7 @@ disable_clks: clk_disable_unprepare(ahb_clk); disable_gdsc: regulator_disable(gdsc_reg); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); put_clk: clk_put(ahb_clk); put_gdsc: @@ -334,46 +334,46 @@ static int dsi_regulator_init(struct msm_dsi_host *msm_host) static int dsi_clk_init(struct msm_dsi_host *msm_host) { - struct device *dev = &msm_host->pdev->dev; + struct platform_device *pdev = msm_host->pdev; const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd; const struct msm_dsi_config *cfg = cfg_hnd->cfg; int i, ret = 0; /* get bus clocks */ for (i = 0; i < cfg->num_bus_clks; i++) { - msm_host->bus_clks[i] = devm_clk_get(dev, + msm_host->bus_clks[i] = msm_clk_get(pdev, cfg->bus_clk_names[i]); if (IS_ERR(msm_host->bus_clks[i])) { ret = PTR_ERR(msm_host->bus_clks[i]); - pr_err("%s: Unable to get %s, ret = %d\n", + pr_err("%s: Unable to get %s clock, ret = %d\n", __func__, cfg->bus_clk_names[i], ret); goto exit; } } /* get link and source clocks */ - msm_host->byte_clk = devm_clk_get(dev, "byte_clk"); + msm_host->byte_clk = msm_clk_get(pdev, "byte"); if (IS_ERR(msm_host->byte_clk)) { ret = PTR_ERR(msm_host->byte_clk); - pr_err("%s: can't find dsi_byte_clk. ret=%d\n", + pr_err("%s: can't find dsi_byte clock. ret=%d\n", __func__, ret); msm_host->byte_clk = NULL; goto exit; } - msm_host->pixel_clk = devm_clk_get(dev, "pixel_clk"); + msm_host->pixel_clk = msm_clk_get(pdev, "pixel"); if (IS_ERR(msm_host->pixel_clk)) { ret = PTR_ERR(msm_host->pixel_clk); - pr_err("%s: can't find dsi_pixel_clk. ret=%d\n", + pr_err("%s: can't find dsi_pixel clock. ret=%d\n", __func__, ret); msm_host->pixel_clk = NULL; goto exit; } - msm_host->esc_clk = devm_clk_get(dev, "core_clk"); + msm_host->esc_clk = msm_clk_get(pdev, "core"); if (IS_ERR(msm_host->esc_clk)) { ret = PTR_ERR(msm_host->esc_clk); - pr_err("%s: can't find dsi_esc_clk. ret=%d\n", + pr_err("%s: can't find dsi_esc clock. ret=%d\n", __func__, ret); msm_host->esc_clk = NULL; goto exit; @@ -382,22 +382,22 @@ static int dsi_clk_init(struct msm_dsi_host *msm_host) msm_host->byte_clk_src = clk_get_parent(msm_host->byte_clk); if (!msm_host->byte_clk_src) { ret = -ENODEV; - pr_err("%s: can't find byte_clk_src. ret=%d\n", __func__, ret); + pr_err("%s: can't find byte_clk clock. ret=%d\n", __func__, ret); goto exit; } msm_host->pixel_clk_src = clk_get_parent(msm_host->pixel_clk); if (!msm_host->pixel_clk_src) { ret = -ENODEV; - pr_err("%s: can't find pixel_clk_src. ret=%d\n", __func__, ret); + pr_err("%s: can't find pixel_clk clock. ret=%d\n", __func__, ret); goto exit; } if (cfg_hnd->major == MSM_DSI_VER_MAJOR_V2) { - msm_host->src_clk = devm_clk_get(dev, "src_clk"); + msm_host->src_clk = msm_clk_get(pdev, "src"); if (IS_ERR(msm_host->src_clk)) { ret = PTR_ERR(msm_host->src_clk); - pr_err("%s: can't find dsi_src_clk. ret=%d\n", + pr_err("%s: can't find src clock. ret=%d\n", __func__, ret); msm_host->src_clk = NULL; goto exit; @@ -406,7 +406,7 @@ static int dsi_clk_init(struct msm_dsi_host *msm_host) msm_host->esc_clk_src = clk_get_parent(msm_host->esc_clk); if (!msm_host->esc_clk_src) { ret = -ENODEV; - pr_err("%s: can't get esc_clk_src. ret=%d\n", + pr_err("%s: can't get esc clock parent. ret=%d\n", __func__, ret); goto exit; } @@ -414,7 +414,7 @@ static int dsi_clk_init(struct msm_dsi_host *msm_host) msm_host->dsi_clk_src = clk_get_parent(msm_host->src_clk); if (!msm_host->dsi_clk_src) { ret = -ENODEV; - pr_err("%s: can't get dsi_clk_src. ret=%d\n", + pr_err("%s: can't get src clock parent. ret=%d\n", __func__, ret); } } diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 7c9bf91bc22b..790ca280cbfd 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -482,7 +482,7 @@ static int dsi_phy_driver_probe(struct platform_device *pdev) goto fail; } - phy->ahb_clk = devm_clk_get(dev, "iface_clk"); + phy->ahb_clk = msm_clk_get(pdev, "iface"); if (IS_ERR(phy->ahb_clk)) { dev_err(dev, "%s: Unable to get ahb clk\n", __func__); ret = PTR_ERR(phy->ahb_clk); diff --git a/drivers/gpu/drm/msm/edp/edp_ctrl.c b/drivers/gpu/drm/msm/edp/edp_ctrl.c index e32a4a4f3797..7c72264101ff 100644 --- a/drivers/gpu/drm/msm/edp/edp_ctrl.c +++ b/drivers/gpu/drm/msm/edp/edp_ctrl.c @@ -150,46 +150,46 @@ static const struct edp_pixel_clk_div clk_divs[2][EDP_PIXEL_CLK_NUM] = { static int edp_clk_init(struct edp_ctrl *ctrl) { - struct device *dev = &ctrl->pdev->dev; + struct platform_device *pdev = ctrl->pdev; int ret; - ctrl->aux_clk = devm_clk_get(dev, "core_clk"); + ctrl->aux_clk = msm_clk_get(pdev, "core"); if (IS_ERR(ctrl->aux_clk)) { ret = PTR_ERR(ctrl->aux_clk); - pr_err("%s: Can't find aux_clk, %d\n", __func__, ret); + pr_err("%s: Can't find core clock, %d\n", __func__, ret); ctrl->aux_clk = NULL; return ret; } - ctrl->pixel_clk = devm_clk_get(dev, "pixel_clk"); + ctrl->pixel_clk = msm_clk_get(pdev, "pixel"); if (IS_ERR(ctrl->pixel_clk)) { ret = PTR_ERR(ctrl->pixel_clk); - pr_err("%s: Can't find pixel_clk, %d\n", __func__, ret); + pr_err("%s: Can't find pixel clock, %d\n", __func__, ret); ctrl->pixel_clk = NULL; return ret; } - ctrl->ahb_clk = devm_clk_get(dev, "iface_clk"); + ctrl->ahb_clk = msm_clk_get(pdev, "iface"); if (IS_ERR(ctrl->ahb_clk)) { ret = PTR_ERR(ctrl->ahb_clk); - pr_err("%s: Can't find ahb_clk, %d\n", __func__, ret); + pr_err("%s: Can't find iface clock, %d\n", __func__, ret); ctrl->ahb_clk = NULL; return ret; } - ctrl->link_clk = devm_clk_get(dev, "link_clk"); + ctrl->link_clk = msm_clk_get(pdev, "link"); if (IS_ERR(ctrl->link_clk)) { ret = PTR_ERR(ctrl->link_clk); - pr_err("%s: Can't find link_clk, %d\n", __func__, ret); + pr_err("%s: Can't find link clock, %d\n", __func__, ret); ctrl->link_clk = NULL; return ret; } /* need mdp core clock to receive irq */ - ctrl->mdp_core_clk = devm_clk_get(dev, "mdp_core_clk"); + ctrl->mdp_core_clk = msm_clk_get(pdev, "mdp_core"); if (IS_ERR(ctrl->mdp_core_clk)) { ret = PTR_ERR(ctrl->mdp_core_clk); - pr_err("%s: Can't find mdp_core_clk, %d\n", __func__, ret); + pr_err("%s: Can't find mdp_core clock, %d\n", __func__, ret); ctrl->mdp_core_clk = NULL; return ret; } diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 17e069a133a4..e63dc0fb55f8 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -208,7 +208,7 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev) for (i = 0; i < config->hpd_clk_cnt; i++) { struct clk *clk; - clk = devm_clk_get(&pdev->dev, config->hpd_clk_names[i]); + clk = msm_clk_get(pdev, config->hpd_clk_names[i]); if (IS_ERR(clk)) { ret = PTR_ERR(clk); dev_err(&pdev->dev, "failed to get hpd clk: %s (%d)\n", @@ -228,7 +228,7 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev) for (i = 0; i < config->pwr_clk_cnt; i++) { struct clk *clk; - clk = devm_clk_get(&pdev->dev, config->pwr_clk_names[i]); + clk = msm_clk_get(pdev, config->pwr_clk_names[i]); if (IS_ERR(clk)) { ret = PTR_ERR(clk); dev_err(&pdev->dev, "failed to get pwr clk: %s (%d)\n", @@ -361,7 +361,7 @@ static const char *hpd_reg_names_none[] = {}; static struct hdmi_platform_config hdmi_tx_8660_config; static const char *hpd_reg_names_8960[] = {"core-vdda", "hdmi-mux"}; -static const char *hpd_clk_names_8960[] = {"core_clk", "master_iface_clk", "slave_iface_clk"}; +static const char *hpd_clk_names_8960[] = {"core", "master_iface", "slave_iface"}; static struct hdmi_platform_config hdmi_tx_8960_config = { HDMI_CFG(hpd_reg, 8960), @@ -370,8 +370,8 @@ static struct hdmi_platform_config hdmi_tx_8960_config = { static const char *pwr_reg_names_8x74[] = {"core-vdda", "core-vcc"}; static const char *hpd_reg_names_8x74[] = {"hpd-gdsc", "hpd-5v"}; -static const char *pwr_clk_names_8x74[] = {"extp_clk", "alt_iface_clk"}; -static const char *hpd_clk_names_8x74[] = {"iface_clk", "core_clk", "mdp_core_clk"}; +static const char *pwr_clk_names_8x74[] = {"extp", "alt_iface"}; +static const char *hpd_clk_names_8x74[] = {"iface", "core", "mdp_core"}; static unsigned long hpd_clk_freq_8x74[] = {0, 19200000, 0}; static struct hdmi_platform_config hdmi_tx_8974_config = { diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c index 534ce5b49781..5e631392dc85 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c @@ -48,7 +48,7 @@ static int msm_hdmi_phy_resource_init(struct hdmi_phy *phy) for (i = 0; i < cfg->num_clks; i++) { struct clk *clk; - clk = devm_clk_get(dev, cfg->clk_names[i]); + clk = msm_clk_get(phy->pdev, cfg->clk_names[i]); if (IS_ERR(clk)) { ret = PTR_ERR(clk); dev_err(dev, "failed to get phy clock: %s (%d)\n", diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c index e6ee6b745ab7..0980da8ec966 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c @@ -48,7 +48,7 @@ static const char * const hdmi_phy_8960_reg_names[] = { }; static const char * const hdmi_phy_8960_clk_names[] = { - "slave_iface_clk", + "slave_iface", }; const struct hdmi_phy_cfg msm_hdmi_phy_8960_cfg = { diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c index 1fb7645cc721..0df504c61833 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c @@ -758,9 +758,7 @@ static const char * const hdmi_phy_8996_reg_names[] = { }; static const char * const hdmi_phy_8996_clk_names[] = { - "mmagic_iface_clk", - "iface_clk", - "ref_clk", + "iface", "ref", }; const struct hdmi_phy_cfg msm_hdmi_phy_8996_cfg = { diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x74.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x74.c index c4a61e537851..4a8b8468586a 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x74.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x74.c @@ -41,8 +41,7 @@ static const char * const hdmi_phy_8x74_reg_names[] = { }; static const char * const hdmi_phy_8x74_clk_names[] = { - "iface_clk", - "alt_iface_clk" + "iface", "alt_iface" }; const struct hdmi_phy_cfg msm_hdmi_phy_8x74_cfg = { diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c index 47fa2aba1983..14bd3bd3e040 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c @@ -290,6 +290,9 @@ static void mdp4_crtc_atomic_disable(struct drm_crtc *crtc, if (WARN_ON(!mdp4_crtc->enabled)) return; + /* Disable/save vblank irq handling before power is disabled */ + drm_crtc_vblank_off(crtc); + mdp_irq_unregister(&mdp4_kms->base, &mdp4_crtc->err); mdp4_disable(mdp4_kms); @@ -308,6 +311,10 @@ static void mdp4_crtc_atomic_enable(struct drm_crtc *crtc, return; mdp4_enable(mdp4_kms); + + /* Restore vblank irq handling after power is enabled */ + drm_crtc_vblank_on(crtc); + mdp_irq_register(&mdp4_kms->base, &mdp4_crtc->err); crtc_flush(crtc); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c index c2bdad88447e..824067d2d427 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c @@ -83,6 +83,8 @@ const struct mdp5_cfg_hw msm8x74v1_config = { .caps = MDP_LM_CAP_WB }, }, .nb_stages = 5, + .max_width = 2048, + .max_height = 0xFFFF, }, .dspp = { .count = 3, diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c index 60790df91bfa..1abc7f5c345c 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c @@ -224,7 +224,7 @@ int mdp5_cmd_encoder_set_split_display(struct drm_encoder *encoder, mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_LOWER, MDP5_SPLIT_DPL_LOWER_SMART_PANEL); mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_EN, 1); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); return 0; } diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 6fcb58ab718c..e414850dbbda 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -55,18 +55,23 @@ struct mdp5_crtc { struct completion pp_completion; + bool lm_cursor_enabled; + struct { /* protect REG_MDP5_LM_CURSOR* registers and cursor scanout_bo*/ spinlock_t lock; /* current cursor being scanned out: */ struct drm_gem_object *scanout_bo; + uint64_t iova; uint32_t width, height; uint32_t x, y; } cursor; }; #define to_mdp5_crtc(x) container_of(x, struct mdp5_crtc, base) +static void mdp5_crtc_restore_cursor(struct drm_crtc *crtc); + static struct mdp5_kms *get_kms(struct drm_crtc *crtc) { struct msm_drm_private *priv = crtc->dev->dev_private; @@ -114,6 +119,8 @@ static u32 crtc_flush_all(struct drm_crtc *crtc) return 0; drm_atomic_crtc_for_each_plane(plane, crtc) { + if (!plane->state->visible) + continue; flush_mask |= mdp5_plane_get_flush(plane); } @@ -242,6 +249,9 @@ static void blend_setup(struct drm_crtc *crtc) drm_atomic_crtc_for_each_plane(plane, crtc) { enum mdp5_pipe right_pipe; + if (!plane->state->visible) + continue; + pstate = to_mdp5_plane_state(plane->state); pstates[pstate->stage] = pstate; stage[pstate->stage][PIPE_LEFT] = mdp5_plane_pipe(plane); @@ -422,11 +432,14 @@ static void mdp5_crtc_atomic_disable(struct drm_crtc *crtc, if (WARN_ON(!mdp5_crtc->enabled)) return; + /* Disable/save vblank irq handling before power is disabled */ + drm_crtc_vblank_off(crtc); + if (mdp5_cstate->cmd_mode) mdp_irq_unregister(&mdp5_kms->base, &mdp5_crtc->pp_done); mdp_irq_unregister(&mdp5_kms->base, &mdp5_crtc->err); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); mdp5_crtc->enabled = false; } @@ -446,6 +459,29 @@ static void mdp5_crtc_atomic_enable(struct drm_crtc *crtc, pm_runtime_get_sync(dev); + if (mdp5_crtc->lm_cursor_enabled) { + /* + * Restore LM cursor state, as it might have been lost + * with suspend: + */ + if (mdp5_crtc->cursor.iova) { + unsigned long flags; + + spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags); + mdp5_crtc_restore_cursor(crtc); + spin_unlock_irqrestore(&mdp5_crtc->cursor.lock, flags); + + mdp5_ctl_set_cursor(mdp5_cstate->ctl, + &mdp5_cstate->pipeline, 0, true); + } else { + mdp5_ctl_set_cursor(mdp5_cstate->ctl, + &mdp5_cstate->pipeline, 0, false); + } + } + + /* Restore vblank irq handling after power is enabled */ + drm_crtc_vblank_on(crtc); + mdp5_crtc_mode_set_nofb(crtc); mdp_irq_register(&mdp5_kms->base, &mdp5_crtc->err); @@ -580,6 +616,9 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc, DBG("%s: check", crtc->name); drm_atomic_crtc_state_for_each_plane_state(plane, pstate, state) { + if (!pstate->visible) + continue; + pstates[cnt].plane = plane; pstates[cnt].state = to_mdp5_plane_state(pstate); @@ -723,6 +762,50 @@ static void get_roi(struct drm_crtc *crtc, uint32_t *roi_w, uint32_t *roi_h) mdp5_crtc->cursor.y); } +static void mdp5_crtc_restore_cursor(struct drm_crtc *crtc) +{ + struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state); + struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); + struct mdp5_kms *mdp5_kms = get_kms(crtc); + const enum mdp5_cursor_alpha cur_alpha = CURSOR_ALPHA_PER_PIXEL; + uint32_t blendcfg, stride; + uint32_t x, y, width, height; + uint32_t roi_w, roi_h; + int lm; + + assert_spin_locked(&mdp5_crtc->cursor.lock); + + lm = mdp5_cstate->pipeline.mixer->lm; + + x = mdp5_crtc->cursor.x; + y = mdp5_crtc->cursor.y; + width = mdp5_crtc->cursor.width; + height = mdp5_crtc->cursor.height; + + stride = width * drm_format_plane_cpp(DRM_FORMAT_ARGB8888, 0); + + get_roi(crtc, &roi_w, &roi_h); + + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_STRIDE(lm), stride); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_FORMAT(lm), + MDP5_LM_CURSOR_FORMAT_FORMAT(CURSOR_FMT_ARGB8888)); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_IMG_SIZE(lm), + MDP5_LM_CURSOR_IMG_SIZE_SRC_H(height) | + MDP5_LM_CURSOR_IMG_SIZE_SRC_W(width)); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_SIZE(lm), + MDP5_LM_CURSOR_SIZE_ROI_H(roi_h) | + MDP5_LM_CURSOR_SIZE_ROI_W(roi_w)); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_START_XY(lm), + MDP5_LM_CURSOR_START_XY_Y_START(y) | + MDP5_LM_CURSOR_START_XY_X_START(x)); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_BASE_ADDR(lm), + mdp5_crtc->cursor.iova); + + blendcfg = MDP5_LM_CURSOR_BLEND_CONFIG_BLEND_EN; + blendcfg |= MDP5_LM_CURSOR_BLEND_CONFIG_BLEND_ALPHA_SEL(cur_alpha); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_BLEND_CONFIG(lm), blendcfg); +} + static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file, uint32_t handle, uint32_t width, uint32_t height) @@ -735,16 +818,18 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, struct platform_device *pdev = mdp5_kms->pdev; struct msm_kms *kms = &mdp5_kms->base.base; struct drm_gem_object *cursor_bo, *old_bo = NULL; - uint32_t blendcfg, stride; - uint64_t cursor_addr; struct mdp5_ctl *ctl; - int ret, lm; - enum mdp5_cursor_alpha cur_alpha = CURSOR_ALPHA_PER_PIXEL; + int ret; uint32_t flush_mask = mdp_ctl_flush_mask_cursor(0); - uint32_t roi_w, roi_h; bool cursor_enable = true; unsigned long flags; + if (!mdp5_crtc->lm_cursor_enabled) { + dev_warn(dev->dev, + "cursor_set is deprecated with cursor planes\n"); + return -EINVAL; + } + if ((width > CURSOR_WIDTH) || (height > CURSOR_HEIGHT)) { dev_err(dev->dev, "bad cursor size: %dx%d\n", width, height); return -EINVAL; @@ -761,6 +846,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, if (!handle) { DBG("Cursor off"); cursor_enable = false; + mdp5_crtc->cursor.iova = 0; pm_runtime_get_sync(&pdev->dev); goto set_cursor; } @@ -769,13 +855,11 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, if (!cursor_bo) return -ENOENT; - ret = msm_gem_get_iova(cursor_bo, kms->aspace, &cursor_addr); + ret = msm_gem_get_iova(cursor_bo, kms->aspace, + &mdp5_crtc->cursor.iova); if (ret) return -EINVAL; - lm = mdp5_cstate->pipeline.mixer->lm; - stride = width * drm_format_plane_cpp(DRM_FORMAT_ARGB8888, 0); - pm_runtime_get_sync(&pdev->dev); spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags); @@ -785,27 +869,10 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, mdp5_crtc->cursor.width = width; mdp5_crtc->cursor.height = height; - get_roi(crtc, &roi_w, &roi_h); - - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_STRIDE(lm), stride); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_FORMAT(lm), - MDP5_LM_CURSOR_FORMAT_FORMAT(CURSOR_FMT_ARGB8888)); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_IMG_SIZE(lm), - MDP5_LM_CURSOR_IMG_SIZE_SRC_H(height) | - MDP5_LM_CURSOR_IMG_SIZE_SRC_W(width)); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_SIZE(lm), - MDP5_LM_CURSOR_SIZE_ROI_H(roi_h) | - MDP5_LM_CURSOR_SIZE_ROI_W(roi_w)); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_BASE_ADDR(lm), cursor_addr); - - blendcfg = MDP5_LM_CURSOR_BLEND_CONFIG_BLEND_EN; - blendcfg |= MDP5_LM_CURSOR_BLEND_CONFIG_BLEND_ALPHA_SEL(cur_alpha); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_BLEND_CONFIG(lm), blendcfg); + mdp5_crtc_restore_cursor(crtc); spin_unlock_irqrestore(&mdp5_crtc->cursor.lock, flags); - pm_runtime_put_autosuspend(&pdev->dev); - set_cursor: ret = mdp5_ctl_set_cursor(ctl, pipeline, 0, cursor_enable); if (ret) { @@ -817,7 +884,7 @@ set_cursor: crtc_flush(crtc, flush_mask); end: - pm_runtime_put_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); if (old_bo) { drm_flip_work_queue(&mdp5_crtc->unref_cursor_work, old_bo); /* enable vblank to complete cursor work: */ @@ -831,12 +898,18 @@ static int mdp5_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) struct mdp5_kms *mdp5_kms = get_kms(crtc); struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state); - uint32_t lm = mdp5_cstate->pipeline.mixer->lm; uint32_t flush_mask = mdp_ctl_flush_mask_cursor(0); + struct drm_device *dev = crtc->dev; uint32_t roi_w; uint32_t roi_h; unsigned long flags; + if (!mdp5_crtc->lm_cursor_enabled) { + dev_warn(dev->dev, + "cursor_move is deprecated with cursor planes\n"); + return -EINVAL; + } + /* don't support LM cursors when we we have source split enabled */ if (mdp5_cstate->pipeline.r_mixer) return -EINVAL; @@ -853,17 +926,12 @@ static int mdp5_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) pm_runtime_get_sync(&mdp5_kms->pdev->dev); spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_SIZE(lm), - MDP5_LM_CURSOR_SIZE_ROI_H(roi_h) | - MDP5_LM_CURSOR_SIZE_ROI_W(roi_w)); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_START_XY(lm), - MDP5_LM_CURSOR_START_XY_Y_START(y) | - MDP5_LM_CURSOR_START_XY_X_START(x)); + mdp5_crtc_restore_cursor(crtc); spin_unlock_irqrestore(&mdp5_crtc->cursor.lock, flags); crtc_flush(crtc, flush_mask); - pm_runtime_put_autosuspend(&mdp5_kms->pdev->dev); + pm_runtime_put_sync(&mdp5_kms->pdev->dev); return 0; } @@ -943,16 +1011,6 @@ static const struct drm_crtc_funcs mdp5_crtc_funcs = { .atomic_print_state = mdp5_crtc_atomic_print_state, }; -static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = { - .set_config = drm_atomic_helper_set_config, - .destroy = mdp5_crtc_destroy, - .page_flip = drm_atomic_helper_page_flip, - .reset = mdp5_crtc_reset, - .atomic_duplicate_state = mdp5_crtc_duplicate_state, - .atomic_destroy_state = mdp5_crtc_destroy_state, - .atomic_print_state = mdp5_crtc_atomic_print_state, -}; - static const struct drm_crtc_helper_funcs mdp5_crtc_helper_funcs = { .mode_set_nofb = mdp5_crtc_mode_set_nofb, .atomic_check = mdp5_crtc_atomic_check, @@ -1121,12 +1179,10 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev, mdp5_crtc->err.irq = mdp5_crtc_err_irq; mdp5_crtc->pp_done.irq = mdp5_crtc_pp_done_irq; - if (cursor_plane) - drm_crtc_init_with_planes(dev, crtc, plane, cursor_plane, - &mdp5_crtc_no_lm_cursor_funcs, NULL); - else - drm_crtc_init_with_planes(dev, crtc, plane, NULL, - &mdp5_crtc_funcs, NULL); + mdp5_crtc->lm_cursor_enabled = cursor_plane ? false : true; + + drm_crtc_init_with_planes(dev, crtc, plane, cursor_plane, + &mdp5_crtc_funcs, NULL); drm_flip_work_init(&mdp5_crtc->unref_cursor_work, "unref cursor", unref_cursor_worker); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c index 5b851380d3f2..36ad3cbe5f79 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c @@ -384,7 +384,7 @@ int mdp5_vid_encoder_set_split_display(struct drm_encoder *encoder, mdp5_ctl_pair(mdp5_encoder->ctl, mdp5_slave_enc->ctl, true); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); return 0; } diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c index bb5deb00c899..280e368bc9bb 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c @@ -54,7 +54,7 @@ void mdp5_irq_preinstall(struct msm_kms *kms) pm_runtime_get_sync(dev); mdp5_write(mdp5_kms, REG_MDP5_INTR_CLEAR, 0xffffffff); mdp5_write(mdp5_kms, REG_MDP5_INTR_EN, 0x00000000); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); } int mdp5_irq_postinstall(struct msm_kms *kms) @@ -72,7 +72,7 @@ int mdp5_irq_postinstall(struct msm_kms *kms) pm_runtime_get_sync(dev); mdp_irq_register(mdp_kms, error_handler); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); return 0; } @@ -84,7 +84,7 @@ void mdp5_irq_uninstall(struct msm_kms *kms) pm_runtime_get_sync(dev); mdp5_write(mdp5_kms, REG_MDP5_INTR_EN, 0x00000000); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); } irqreturn_t mdp5_irq(struct msm_kms *kms) @@ -119,7 +119,7 @@ int mdp5_enable_vblank(struct msm_kms *kms, struct drm_crtc *crtc) pm_runtime_get_sync(dev); mdp_update_vblank_mask(to_mdp_kms(kms), mdp5_crtc_vblank(crtc), true); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); return 0; } @@ -132,5 +132,5 @@ void mdp5_disable_vblank(struct msm_kms *kms, struct drm_crtc *crtc) pm_runtime_get_sync(dev); mdp_update_vblank_mask(to_mdp_kms(kms), mdp5_crtc_vblank(crtc), false); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); } diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c index 7e829a8d1cb1..3e9bba4d6624 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c @@ -125,7 +125,7 @@ static void mdp5_complete_commit(struct msm_kms *kms, struct drm_atomic_state *s if (mdp5_kms->smp) mdp5_smp_complete_commit(mdp5_kms->smp, &mdp5_kms->state->smp); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); } static void mdp5_wait_for_crtc_commit_done(struct msm_kms *kms, @@ -496,12 +496,12 @@ static void read_mdp_hw_revision(struct mdp5_kms *mdp5_kms, pm_runtime_get_sync(dev); version = mdp5_read(mdp5_kms, REG_MDP5_HW_VERSION); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); *major = FIELD(version, MDP5_HW_VERSION_MAJOR); *minor = FIELD(version, MDP5_HW_VERSION_MINOR); - DBG("MDP5 version v%d.%d", *major, *minor); + dev_info(dev, "MDP5 version v%d.%d", *major, *minor); } static int get_clk(struct platform_device *pdev, struct clk **clkp, @@ -683,7 +683,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) aspace = NULL;; } - pm_runtime_put_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); ret = modeset_init(mdp5_kms); if (ret) { diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c index 2bfac3712685..ff52c49095f9 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c @@ -17,19 +17,20 @@ #include "mdp5_kms.h" -struct mdp5_hw_pipe *mdp5_pipe_assign(struct drm_atomic_state *s, - struct drm_plane *plane, uint32_t caps, uint32_t blkcfg) +int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, + uint32_t caps, uint32_t blkcfg, + struct mdp5_hw_pipe **hwpipe, + struct mdp5_hw_pipe **r_hwpipe) { struct msm_drm_private *priv = s->dev->dev_private; struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(priv->kms)); struct mdp5_state *state; struct mdp5_hw_pipe_state *old_state, *new_state; - struct mdp5_hw_pipe *hwpipe = NULL; - int i; + int i, j; state = mdp5_get_state(s); if (IS_ERR(state)) - return ERR_CAST(state); + return PTR_ERR(state); /* grab old_state after mdp5_get_state(), since now we hold lock: */ old_state = &mdp5_kms->state->hwpipe; @@ -64,31 +65,67 @@ struct mdp5_hw_pipe *mdp5_pipe_assign(struct drm_atomic_state *s, /* possible candidate, take the one with the * fewest unneeded caps bits set: */ - if (!hwpipe || (hweight_long(cur->caps & ~caps) < - hweight_long(hwpipe->caps & ~caps))) - hwpipe = cur; + if (!(*hwpipe) || (hweight_long(cur->caps & ~caps) < + hweight_long((*hwpipe)->caps & ~caps))) { + bool r_found = false; + + if (r_hwpipe) { + for (j = i + 1; j < mdp5_kms->num_hwpipes; + j++) { + struct mdp5_hw_pipe *r_cur = + mdp5_kms->hwpipes[j]; + + /* reject different types of hwpipes */ + if (r_cur->caps != cur->caps) + continue; + + /* respect priority, eg. VIG0 > VIG1 */ + if (cur->pipe > r_cur->pipe) + continue; + + *r_hwpipe = r_cur; + r_found = true; + break; + } + } + + if (!r_hwpipe || r_found) + *hwpipe = cur; + } } - if (!hwpipe) - return ERR_PTR(-ENOMEM); + if (!(*hwpipe)) + return -ENOMEM; + + if (r_hwpipe && !(*r_hwpipe)) + return -ENOMEM; if (mdp5_kms->smp) { int ret; - DBG("%s: alloc SMP blocks", hwpipe->name); + /* We don't support SMP and 2 hwpipes/plane together */ + WARN_ON(r_hwpipe); + + DBG("%s: alloc SMP blocks", (*hwpipe)->name); ret = mdp5_smp_assign(mdp5_kms->smp, &state->smp, - hwpipe->pipe, blkcfg); + (*hwpipe)->pipe, blkcfg); if (ret) - return ERR_PTR(-ENOMEM); + return -ENOMEM; - hwpipe->blkcfg = blkcfg; + (*hwpipe)->blkcfg = blkcfg; } DBG("%s: assign to plane %s for caps %x", - hwpipe->name, plane->name, caps); - new_state->hwpipe_to_plane[hwpipe->idx] = plane; + (*hwpipe)->name, plane->name, caps); + new_state->hwpipe_to_plane[(*hwpipe)->idx] = plane; - return hwpipe; + if (r_hwpipe) { + DBG("%s: assign to right of plane %s for caps %x", + (*r_hwpipe)->name, plane->name, caps); + new_state->hwpipe_to_plane[(*r_hwpipe)->idx] = plane; + } + + return 0; } void mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h index 924c3e6f9517..bb2b0ac7aa2b 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h @@ -44,9 +44,10 @@ struct mdp5_hw_pipe_state { struct drm_plane *hwpipe_to_plane[SSPP_MAX]; }; -struct mdp5_hw_pipe *__must_check -mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, - uint32_t caps, uint32_t blkcfg); +int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, + uint32_t caps, uint32_t blkcfg, + struct mdp5_hw_pipe **hwpipe, + struct mdp5_hw_pipe **r_hwpipe); void mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe); struct mdp5_hw_pipe *mdp5_pipe_init(enum mdp5_pipe pipe, diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index 4b22ac3413a1..be50445f9901 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -31,15 +31,6 @@ static int mdp5_plane_mode_set(struct drm_plane *plane, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_rect *src, struct drm_rect *dest); -static int mdp5_update_cursor_plane_legacy(struct drm_plane *plane, - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int crtc_x, int crtc_y, - unsigned int crtc_w, unsigned int crtc_h, - uint32_t src_x, uint32_t src_y, - uint32_t src_w, uint32_t src_h, - struct drm_modeset_acquire_ctx *ctx); - static struct mdp5_kms *get_kms(struct drm_plane *plane) { struct msm_drm_private *priv = plane->dev->dev_private; @@ -254,18 +245,6 @@ static const struct drm_plane_funcs mdp5_plane_funcs = { .atomic_print_state = mdp5_plane_atomic_print_state, }; -static const struct drm_plane_funcs mdp5_cursor_plane_funcs = { - .update_plane = mdp5_update_cursor_plane_legacy, - .disable_plane = drm_atomic_helper_disable_plane, - .destroy = mdp5_plane_destroy, - .atomic_set_property = mdp5_plane_atomic_set_property, - .atomic_get_property = mdp5_plane_atomic_get_property, - .reset = mdp5_plane_reset, - .atomic_duplicate_state = mdp5_plane_duplicate_state, - .atomic_destroy_state = mdp5_plane_destroy_state, - .atomic_print_state = mdp5_plane_atomic_print_state, -}; - static int mdp5_plane_prepare_fb(struct drm_plane *plane, struct drm_plane_state *new_state) { @@ -414,31 +393,30 @@ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state, struct mdp5_hw_pipe *old_hwpipe = mdp5_state->hwpipe; struct mdp5_hw_pipe *old_right_hwpipe = mdp5_state->r_hwpipe; - - mdp5_state->hwpipe = mdp5_pipe_assign(state->state, - plane, caps, blkcfg); - if (IS_ERR(mdp5_state->hwpipe)) { - DBG("%s: failed to assign hwpipe!", plane->name); - return PTR_ERR(mdp5_state->hwpipe); + struct mdp5_hw_pipe *new_hwpipe = NULL; + struct mdp5_hw_pipe *new_right_hwpipe = NULL; + + ret = mdp5_pipe_assign(state->state, plane, caps, + blkcfg, &new_hwpipe, + need_right_hwpipe ? + &new_right_hwpipe : NULL); + if (ret) { + DBG("%s: failed to assign hwpipe(s)!", + plane->name); + return ret; } - if (need_right_hwpipe) { - mdp5_state->r_hwpipe = - mdp5_pipe_assign(state->state, plane, - caps, blkcfg); - if (IS_ERR(mdp5_state->r_hwpipe)) { - DBG("%s: failed to assign right hwpipe", - plane->name); - return PTR_ERR(mdp5_state->r_hwpipe); - } - } else { + mdp5_state->hwpipe = new_hwpipe; + if (need_right_hwpipe) + mdp5_state->r_hwpipe = new_right_hwpipe; + else /* * set it to NULL so that the driver knows we * don't have a right hwpipe when committing a * new state */ mdp5_state->r_hwpipe = NULL; - } + mdp5_pipe_release(state->state, old_hwpipe); mdp5_pipe_release(state->state, old_right_hwpipe); @@ -487,11 +465,98 @@ static void mdp5_plane_atomic_update(struct drm_plane *plane, } } +static int mdp5_plane_atomic_async_check(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct mdp5_plane_state *mdp5_state = to_mdp5_plane_state(state); + struct drm_crtc_state *crtc_state; + struct drm_rect clip; + int min_scale, max_scale; + int ret; + + crtc_state = drm_atomic_get_existing_crtc_state(state->state, + state->crtc); + if (WARN_ON(!crtc_state)) + return -EINVAL; + + if (!crtc_state->active) + return -EINVAL; + + mdp5_state = to_mdp5_plane_state(state); + + /* don't use fast path if we don't have a hwpipe allocated yet */ + if (!mdp5_state->hwpipe) + return -EINVAL; + + /* only allow changing of position(crtc x/y or src x/y) in fast path */ + if (plane->state->crtc != state->crtc || + plane->state->src_w != state->src_w || + plane->state->src_h != state->src_h || + plane->state->crtc_w != state->crtc_w || + plane->state->crtc_h != state->crtc_h || + !plane->state->fb || + plane->state->fb != state->fb) + return -EINVAL; + + clip.x1 = 0; + clip.y1 = 0; + clip.x2 = crtc_state->adjusted_mode.hdisplay; + clip.y2 = crtc_state->adjusted_mode.vdisplay; + min_scale = FRAC_16_16(1, 8); + max_scale = FRAC_16_16(8, 1); + + ret = drm_plane_helper_check_state(state, &clip, min_scale, + max_scale, true, true); + if (ret) + return ret; + + /* + * if the visibility of the plane changes (i.e, if the cursor is + * clipped out completely, we can't take the async path because + * we need to stage/unstage the plane from the Layer Mixer(s). We + * also assign/unassign the hwpipe(s) tied to the plane. We avoid + * taking the fast path for both these reasons. + */ + if (state->visible != plane->state->visible) + return -EINVAL; + + return 0; +} + +static void mdp5_plane_atomic_async_update(struct drm_plane *plane, + struct drm_plane_state *new_state) +{ + plane->state->src_x = new_state->src_x; + plane->state->src_y = new_state->src_y; + plane->state->crtc_x = new_state->crtc_x; + plane->state->crtc_y = new_state->crtc_y; + + if (plane_enabled(new_state)) { + struct mdp5_ctl *ctl; + struct mdp5_pipeline *pipeline = + mdp5_crtc_get_pipeline(plane->crtc); + int ret; + + ret = mdp5_plane_mode_set(plane, new_state->crtc, new_state->fb, + &new_state->src, &new_state->dst); + WARN_ON(ret < 0); + + ctl = mdp5_crtc_get_ctl(new_state->crtc); + + mdp5_ctl_commit(ctl, pipeline, mdp5_plane_get_flush(plane)); + } + + *to_mdp5_plane_state(plane->state) = + *to_mdp5_plane_state(new_state); +} + static const struct drm_plane_helper_funcs mdp5_plane_helper_funcs = { .prepare_fb = mdp5_plane_prepare_fb, .cleanup_fb = mdp5_plane_cleanup_fb, .atomic_check = mdp5_plane_atomic_check, .atomic_update = mdp5_plane_atomic_update, + .atomic_async_check = mdp5_plane_atomic_async_check, + .atomic_async_update = mdp5_plane_atomic_async_update, }; static void set_scanout_locked(struct mdp5_kms *mdp5_kms, @@ -996,84 +1061,6 @@ static int mdp5_plane_mode_set(struct drm_plane *plane, return ret; } -static int mdp5_update_cursor_plane_legacy(struct drm_plane *plane, - struct drm_crtc *crtc, struct drm_framebuffer *fb, - int crtc_x, int crtc_y, - unsigned int crtc_w, unsigned int crtc_h, - uint32_t src_x, uint32_t src_y, - uint32_t src_w, uint32_t src_h, - struct drm_modeset_acquire_ctx *ctx) -{ - struct drm_plane_state *plane_state, *new_plane_state; - struct mdp5_plane_state *mdp5_pstate; - struct drm_crtc_state *crtc_state = crtc->state; - int ret; - - if (!crtc_state->active || drm_atomic_crtc_needs_modeset(crtc_state)) - goto slow; - - plane_state = plane->state; - mdp5_pstate = to_mdp5_plane_state(plane_state); - - /* don't use fast path if we don't have a hwpipe allocated yet */ - if (!mdp5_pstate->hwpipe) - goto slow; - - /* only allow changing of position(crtc x/y or src x/y) in fast path */ - if (plane_state->crtc != crtc || - plane_state->src_w != src_w || - plane_state->src_h != src_h || - plane_state->crtc_w != crtc_w || - plane_state->crtc_h != crtc_h || - !plane_state->fb || - plane_state->fb != fb) - goto slow; - - new_plane_state = mdp5_plane_duplicate_state(plane); - if (!new_plane_state) - return -ENOMEM; - - new_plane_state->src_x = src_x; - new_plane_state->src_y = src_y; - new_plane_state->src_w = src_w; - new_plane_state->src_h = src_h; - new_plane_state->crtc_x = crtc_x; - new_plane_state->crtc_y = crtc_y; - new_plane_state->crtc_w = crtc_w; - new_plane_state->crtc_h = crtc_h; - - ret = mdp5_plane_atomic_check_with_state(crtc_state, new_plane_state); - if (ret) - goto slow_free; - - if (new_plane_state->visible) { - struct mdp5_ctl *ctl; - struct mdp5_pipeline *pipeline = mdp5_crtc_get_pipeline(crtc); - - ret = mdp5_plane_mode_set(plane, crtc, fb, - &new_plane_state->src, - &new_plane_state->dst); - WARN_ON(ret < 0); - - ctl = mdp5_crtc_get_ctl(crtc); - - mdp5_ctl_commit(ctl, pipeline, mdp5_plane_get_flush(plane)); - } - - *to_mdp5_plane_state(plane_state) = - *to_mdp5_plane_state(new_plane_state); - - mdp5_plane_destroy_state(plane, new_plane_state); - - return 0; -slow_free: - mdp5_plane_destroy_state(plane, new_plane_state); -slow: - return drm_atomic_helper_update_plane(plane, crtc, fb, - crtc_x, crtc_y, crtc_w, crtc_h, - src_x, src_y, src_w, src_h, ctx); -} - /* * Use this func and the one below only after the atomic state has been * successfully swapped @@ -1133,16 +1120,9 @@ struct drm_plane *mdp5_plane_init(struct drm_device *dev, mdp5_plane->nformats = mdp_get_formats(mdp5_plane->formats, ARRAY_SIZE(mdp5_plane->formats), false); - if (type == DRM_PLANE_TYPE_CURSOR) - ret = drm_universal_plane_init(dev, plane, 0xff, - &mdp5_cursor_plane_funcs, - mdp5_plane->formats, mdp5_plane->nformats, - NULL, type, NULL); - else - ret = drm_universal_plane_init(dev, plane, 0xff, - &mdp5_plane_funcs, - mdp5_plane->formats, mdp5_plane->nformats, - NULL, type, NULL); + ret = drm_universal_plane_init(dev, plane, 0xff, &mdp5_plane_funcs, + mdp5_plane->formats, mdp5_plane->nformats, + NULL, type, NULL); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c index 025d454163b0..bf5f8c39f34d 100644 --- a/drivers/gpu/drm/msm/msm_atomic.c +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -146,35 +146,6 @@ static void commit_worker(struct work_struct *work) complete_commit(container_of(work, struct msm_commit, work), true); } -/* - * this func is identical to the drm_atomic_helper_check, but we keep this - * because we might eventually need to have a more finegrained check - * sequence without using the atomic helpers. - * - * In the past, we first called drm_atomic_helper_check_planes, and then - * drm_atomic_helper_check_modeset. We needed this because the MDP5 plane's - * ->atomic_check could update ->mode_changed for pixel format changes. - * This, however isn't needed now because if there is a pixel format change, - * we just assign a new hwpipe for it with a new SMP allocation. We might - * eventually hit a condition where we would need to do a full modeset if - * we run out of planes. There, we'd probably need to set mode_changed. - */ -int msm_atomic_check(struct drm_device *dev, - struct drm_atomic_state *state) -{ - int ret; - - ret = drm_atomic_helper_check_modeset(dev, state); - if (ret) - return ret; - - ret = drm_atomic_helper_check_planes(dev, state); - if (ret) - return ret; - - return ret; -} - /** * drm_atomic_helper_commit - commit validated state object * @dev: DRM device @@ -202,6 +173,18 @@ int msm_atomic_commit(struct drm_device *dev, if (ret) return ret; + /* + * Note that plane->atomic_async_check() should fail if we need + * to re-assign hwpipe or anything that touches global atomic + * state, so we'll never go down the async update path in those + * cases. + */ + if (state->async_update) { + drm_atomic_helper_async_commit(dev, state); + drm_atomic_helper_cleanup_planes(dev, state); + return 0; + } + c = commit_init(state); if (!c) { ret = -ENOMEM; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 606df7bea97b..0a3ea3034e39 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -29,9 +29,12 @@ * - 1.0.0 - initial interface * - 1.1.0 - adds madvise, and support for submits with > 4 cmd buffers * - 1.2.0 - adds explicit fence support for submit ioctl + * - 1.3.0 - adds GMEM_BASE + NR_RINGS params, SUBMITQUEUE_NEW + + * SUBMITQUEUE_CLOSE ioctls, and MSM_INFO_IOVA flag for + * MSM_GEM_INFO ioctl. */ #define MSM_VERSION_MAJOR 1 -#define MSM_VERSION_MINOR 2 +#define MSM_VERSION_MINOR 3 #define MSM_VERSION_PATCHLEVEL 0 static void msm_fb_output_poll_changed(struct drm_device *dev) @@ -44,7 +47,7 @@ static void msm_fb_output_poll_changed(struct drm_device *dev) static const struct drm_mode_config_funcs mode_config_funcs = { .fb_create = msm_framebuffer_create, .output_poll_changed = msm_fb_output_poll_changed, - .atomic_check = msm_atomic_check, + .atomic_check = drm_atomic_helper_check, .atomic_commit = msm_atomic_commit, .atomic_state_alloc = msm_atomic_state_alloc, .atomic_state_clear = msm_atomic_state_clear, @@ -211,7 +214,6 @@ static int msm_drm_uninit(struct device *dev) struct drm_device *ddev = platform_get_drvdata(pdev); struct msm_drm_private *priv = ddev->dev_private; struct msm_kms *kms = priv->kms; - struct msm_gpu *gpu = priv->gpu; struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl; struct vblank_event *vbl_ev, *tmp; @@ -253,15 +255,6 @@ static int msm_drm_uninit(struct device *dev) if (kms && kms->funcs) kms->funcs->destroy(kms); - if (gpu) { - mutex_lock(&ddev->struct_mutex); - // XXX what do we do here? - //pm_runtime_enable(&pdev->dev); - gpu->funcs->pm_suspend(gpu); - mutex_unlock(&ddev->struct_mutex); - gpu->funcs->destroy(gpu); - } - if (priv->vram.paddr) { unsigned long attrs = DMA_ATTR_NO_KERNEL_MAPPING; drm_mm_takedown(&priv->vram.mm); @@ -514,24 +507,37 @@ static void load_gpu(struct drm_device *dev) mutex_unlock(&init_lock); } -static int msm_open(struct drm_device *dev, struct drm_file *file) +static int context_init(struct drm_device *dev, struct drm_file *file) { struct msm_file_private *ctx; - /* For now, load gpu on open.. to avoid the requirement of having - * firmware in the initrd. - */ - load_gpu(dev); - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; + msm_submitqueue_init(dev, ctx); + file->driver_priv = ctx; return 0; } +static int msm_open(struct drm_device *dev, struct drm_file *file) +{ + /* For now, load gpu on open.. to avoid the requirement of having + * firmware in the initrd. + */ + load_gpu(dev); + + return context_init(dev, file); +} + +static void context_close(struct msm_file_private *ctx) +{ + msm_submitqueue_close(ctx); + kfree(ctx); +} + static void msm_postclose(struct drm_device *dev, struct drm_file *file) { struct msm_drm_private *priv = dev->dev_private; @@ -542,7 +548,7 @@ static void msm_postclose(struct drm_device *dev, struct drm_file *file) priv->lastctx = NULL; mutex_unlock(&dev->struct_mutex); - kfree(ctx); + context_close(ctx); } static void msm_lastclose(struct drm_device *dev) @@ -737,16 +743,27 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, struct msm_drm_private *priv = dev->dev_private; struct drm_msm_wait_fence *args = data; ktime_t timeout = to_ktime(args->timeout); + struct msm_gpu_submitqueue *queue; + struct msm_gpu *gpu = priv->gpu; + int ret; if (args->pad) { DRM_ERROR("invalid pad: %08x\n", args->pad); return -EINVAL; } - if (!priv->gpu) + if (!gpu) return 0; - return msm_wait_fence(priv->gpu->fctx, args->fence, &timeout, true); + queue = msm_submitqueue_get(file->driver_priv, args->queueid); + if (!queue) + return -ENOENT; + + ret = msm_wait_fence(gpu->rb[queue->prio]->fctx, args->fence, &timeout, + true); + + msm_submitqueue_put(queue); + return ret; } static int msm_ioctl_gem_madvise(struct drm_device *dev, void *data, @@ -787,6 +804,28 @@ unlock: return ret; } + +static int msm_ioctl_submitqueue_new(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_msm_submitqueue *args = data; + + if (args->flags & ~MSM_SUBMITQUEUE_FLAGS) + return -EINVAL; + + return msm_submitqueue_create(dev, file->driver_priv, args->prio, + args->flags, &args->id); +} + + +static int msm_ioctl_submitqueue_close(struct drm_device *dev, void *data, + struct drm_file *file) +{ + u32 id = *(u32 *) data; + + return msm_submitqueue_remove(file->driver_priv, id); +} + static const struct drm_ioctl_desc msm_ioctls[] = { DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new, DRM_AUTH|DRM_RENDER_ALLOW), @@ -796,6 +835,8 @@ static const struct drm_ioctl_desc msm_ioctls[] = { DRM_IOCTL_DEF_DRV(MSM_GEM_SUBMIT, msm_ioctl_gem_submit, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(MSM_WAIT_FENCE, msm_ioctl_wait_fence, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(MSM_GEM_MADVISE, msm_ioctl_gem_madvise, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW, msm_ioctl_submitqueue_new, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE, msm_ioctl_submitqueue_close, DRM_AUTH|DRM_RENDER_ALLOW), }; static const struct vm_operations_struct vm_ops = { diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 5e8109c07560..c646843d8822 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -56,11 +56,9 @@ struct msm_gem_address_space; struct msm_gem_vma; struct msm_file_private { - /* currently we don't do anything useful with this.. but when - * per-context address spaces are supported we'd keep track of - * the context's page-tables here. - */ - int dummy; + rwlock_t queuelock; + struct list_head submitqueues; + int queueid; }; enum msm_mdp_plane_property { @@ -76,6 +74,8 @@ struct msm_vblank_ctrl { spinlock_t lock; }; +#define MSM_GPU_MAX_RINGS 4 + struct msm_drm_private { struct drm_device *dev; @@ -108,7 +108,8 @@ struct msm_drm_private { struct drm_fb_helper *fbdev; - struct msm_rd_state *rd; + struct msm_rd_state *rd; /* debugfs to dump all submits */ + struct msm_rd_state *hangrd; /* debugfs to dump hanging submits */ struct msm_perf_state *perf; /* list of GEM objects: */ @@ -154,20 +155,12 @@ struct msm_drm_private { struct shrinker shrinker; struct msm_vblank_ctrl vblank_ctrl; - - /* task holding struct_mutex.. currently only used in submit path - * to detect and reject faults from copy_from_user() for submit - * ioctl. - */ - struct task_struct *struct_mutex_task; }; struct msm_format { uint32_t pixel_format; }; -int msm_atomic_check(struct drm_device *dev, - struct drm_atomic_state *state); int msm_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state, bool nonblock); struct drm_atomic_state *msm_atomic_state_alloc(struct drm_device *dev); @@ -219,6 +212,7 @@ struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, int msm_gem_prime_pin(struct drm_gem_object *obj); void msm_gem_prime_unpin(struct drm_gem_object *obj); void *msm_gem_get_vaddr(struct drm_gem_object *obj); +void *msm_gem_get_vaddr_active(struct drm_gem_object *obj); void msm_gem_put_vaddr(struct drm_gem_object *obj); int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv); int msm_gem_sync_object(struct drm_gem_object *obj, @@ -303,7 +297,8 @@ void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m); int msm_debugfs_late_init(struct drm_device *dev); int msm_rd_debugfs_init(struct drm_minor *minor); void msm_rd_debugfs_cleanup(struct msm_drm_private *priv); -void msm_rd_dump_submit(struct msm_gem_submit *submit); +void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit, + const char *fmt, ...); int msm_perf_debugfs_init(struct drm_minor *minor); void msm_perf_debugfs_cleanup(struct msm_drm_private *priv); #else @@ -319,6 +314,18 @@ void __iomem *msm_ioremap(struct platform_device *pdev, const char *name, void msm_writel(u32 data, void __iomem *addr); u32 msm_readl(const void __iomem *addr); +struct msm_gpu_submitqueue; +int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx); +struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, + u32 id); +int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, + u32 prio, u32 flags, u32 *id); +int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id); +void msm_submitqueue_close(struct msm_file_private *ctx); + +void msm_submitqueue_destroy(struct kref *kref); + + #define DBG(fmt, ...) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__) #define VERB(fmt, ...) if (0) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__) diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c index a2f89bac9c16..349c12f670eb 100644 --- a/drivers/gpu/drm/msm/msm_fence.c +++ b/drivers/gpu/drm/msm/msm_fence.c @@ -31,7 +31,7 @@ msm_fence_context_alloc(struct drm_device *dev, const char *name) return ERR_PTR(-ENOMEM); fctx->dev = dev; - fctx->name = name; + strncpy(fctx->name, name, sizeof(fctx->name)); fctx->context = dma_fence_context_alloc(1); init_waitqueue_head(&fctx->event); spin_lock_init(&fctx->spinlock); diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h index 56061aa1959d..1aa6a4c6530c 100644 --- a/drivers/gpu/drm/msm/msm_fence.h +++ b/drivers/gpu/drm/msm/msm_fence.h @@ -22,7 +22,7 @@ struct msm_fence_context { struct drm_device *dev; - const char *name; + char name[32]; unsigned context; /* last_fence == completed_fence --> no pending work */ uint32_t last_fence; /* last assigned fence */ diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index f15821a0d900..81fe6d6740ce 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -470,14 +470,16 @@ fail: return ret; } -void *msm_gem_get_vaddr(struct drm_gem_object *obj) +static void *get_vaddr(struct drm_gem_object *obj, unsigned madv) { struct msm_gem_object *msm_obj = to_msm_bo(obj); int ret = 0; mutex_lock(&msm_obj->lock); - if (WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED)) { + if (WARN_ON(msm_obj->madv > madv)) { + dev_err(obj->dev->dev, "Invalid madv state: %u vs %u\n", + msm_obj->madv, madv); mutex_unlock(&msm_obj->lock); return ERR_PTR(-EBUSY); } @@ -513,6 +515,22 @@ fail: return ERR_PTR(ret); } +void *msm_gem_get_vaddr(struct drm_gem_object *obj) +{ + return get_vaddr(obj, MSM_MADV_WILLNEED); +} + +/* + * Don't use this! It is for the very special case of dumping + * submits from GPU hangs or faults, were the bo may already + * be MSM_MADV_DONTNEED, but we know the buffer is still on the + * active list. + */ +void *msm_gem_get_vaddr_active(struct drm_gem_object *obj) +{ + return get_vaddr(obj, __MSM_MADV_PURGED); +} + void msm_gem_put_vaddr(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); @@ -610,17 +628,6 @@ int msm_gem_sync_object(struct drm_gem_object *obj, struct dma_fence *fence; int i, ret; - if (!exclusive) { - /* NOTE: _reserve_shared() must happen before _add_shared_fence(), - * which makes this a slightly strange place to call it. OTOH this - * is a convenient can-fail point to hook it in. (And similar to - * how etnaviv and nouveau handle this.) - */ - ret = reservation_object_reserve_shared(msm_obj->resv); - if (ret) - return ret; - } - fobj = reservation_object_get_list(msm_obj->resv); if (!fobj || (fobj->shared_count == 0)) { fence = reservation_object_get_excl(msm_obj->resv); @@ -1045,10 +1052,10 @@ static void *_msm_gem_kernel_new(struct drm_device *dev, uint32_t size, } vaddr = msm_gem_get_vaddr(obj); - if (!vaddr) { + if (IS_ERR(vaddr)) { msm_gem_put_iova(obj, aspace); drm_gem_object_unreference(obj); - return ERR_PTR(-ENOMEM); + return ERR_CAST(vaddr); } if (bo) diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 91c210d2359c..9320e184b48d 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -138,12 +138,15 @@ void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass); struct msm_gem_submit { struct drm_device *dev; struct msm_gpu *gpu; - struct list_head node; /* node in gpu submit_list */ + struct list_head node; /* node in ring submit list */ struct list_head bo_list; struct ww_acquire_ctx ticket; + uint32_t seqno; /* Sequence number of the submit on the ring */ struct dma_fence *fence; + struct msm_gpu_submitqueue *queue; struct pid *pid; /* submitting process */ bool valid; /* true if no cmdstream patching needed */ + struct msm_ringbuffer *ring; unsigned int nr_cmds; unsigned int nr_bos; struct { diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 5d0a75d4b249..b8dc8f96caf2 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -31,7 +31,8 @@ #define BO_PINNED 0x2000 static struct msm_gem_submit *submit_create(struct drm_device *dev, - struct msm_gpu *gpu, uint32_t nr_bos, uint32_t nr_cmds) + struct msm_gpu *gpu, struct msm_gpu_submitqueue *queue, + uint32_t nr_bos, uint32_t nr_cmds) { struct msm_gem_submit *submit; uint64_t sz = sizeof(*submit) + ((u64)nr_bos * sizeof(submit->bos[0])) + @@ -49,6 +50,8 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, submit->fence = NULL; submit->pid = get_pid(task_pid(current)); submit->cmd = (void *)&submit->bos[nr_bos]; + submit->queue = queue; + submit->ring = gpu->rb[queue->prio]; /* initially, until copy_from_user() and bo lookup succeeds: */ submit->nr_bos = 0; @@ -66,6 +69,8 @@ void msm_gem_submit_free(struct msm_gem_submit *submit) dma_fence_put(submit->fence); list_del(&submit->node); put_pid(submit->pid); + msm_submitqueue_put(submit->queue); + kfree(submit); } @@ -156,7 +161,8 @@ out: return ret; } -static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) +static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, + int i, bool backoff) { struct msm_gem_object *msm_obj = submit->bos[i].obj; @@ -166,7 +172,7 @@ static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) if (submit->bos[i].flags & BO_LOCKED) ww_mutex_unlock(&msm_obj->resv->lock); - if (!(submit->bos[i].flags & BO_VALID)) + if (backoff && !(submit->bos[i].flags & BO_VALID)) submit->bos[i].iova = 0; submit->bos[i].flags &= ~(BO_LOCKED | BO_PINNED); @@ -201,10 +207,10 @@ retry: fail: for (; i >= 0; i--) - submit_unlock_unpin_bo(submit, i); + submit_unlock_unpin_bo(submit, i, true); if (slow_locked > 0) - submit_unlock_unpin_bo(submit, slow_locked); + submit_unlock_unpin_bo(submit, slow_locked, true); if (ret == -EDEADLK) { struct msm_gem_object *msm_obj = submit->bos[contended].obj; @@ -221,7 +227,7 @@ fail: return ret; } -static int submit_fence_sync(struct msm_gem_submit *submit) +static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) { int i, ret = 0; @@ -229,7 +235,22 @@ static int submit_fence_sync(struct msm_gem_submit *submit) struct msm_gem_object *msm_obj = submit->bos[i].obj; bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE; - ret = msm_gem_sync_object(&msm_obj->base, submit->gpu->fctx, write); + if (!write) { + /* NOTE: _reserve_shared() must happen before + * _add_shared_fence(), which makes this a slightly + * strange place to call it. OTOH this is a + * convenient can-fail point to hook it in. + */ + ret = reservation_object_reserve_shared(msm_obj->resv); + if (ret) + return ret; + } + + if (no_implicit) + continue; + + ret = msm_gem_sync_object(&msm_obj->base, submit->ring->fctx, + write); if (ret) break; } @@ -373,7 +394,7 @@ static void submit_cleanup(struct msm_gem_submit *submit) for (i = 0; i < submit->nr_bos; i++) { struct msm_gem_object *msm_obj = submit->bos[i].obj; - submit_unlock_unpin_bo(submit, i); + submit_unlock_unpin_bo(submit, i, false); list_del_init(&msm_obj->submit_entry); drm_gem_object_unreference(&msm_obj->base); } @@ -391,6 +412,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_gpu *gpu = priv->gpu; struct dma_fence *in_fence = NULL; struct sync_file *sync_file = NULL; + struct msm_gpu_submitqueue *queue; + struct msm_ringbuffer *ring; int out_fence_fd = -1; unsigned i; int ret; @@ -407,6 +430,12 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (MSM_PIPE_FLAGS(args->flags) & ~MSM_SUBMIT_FLAGS) return -EINVAL; + queue = msm_submitqueue_get(ctx, args->queueid); + if (!queue) + return -ENOENT; + + ring = gpu->rb[queue->prio]; + if (args->flags & MSM_SUBMIT_FENCE_FD_IN) { in_fence = sync_file_get_fence(args->fence_fd); @@ -417,7 +446,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, * Wait if the fence is from a foreign context, or if the fence * array contains any fence from a foreign context. */ - if (!dma_fence_match_context(in_fence, gpu->fctx->context)) { + if (!dma_fence_match_context(in_fence, ring->fctx->context)) { ret = dma_fence_wait(in_fence, true); if (ret) return ret; @@ -435,9 +464,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, goto out_unlock; } } - priv->struct_mutex_task = current; - submit = submit_create(dev, gpu, args->nr_bos, args->nr_cmds); + submit = submit_create(dev, gpu, queue, args->nr_bos, args->nr_cmds); if (!submit) { ret = -ENOMEM; goto out_unlock; @@ -451,11 +479,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret) goto out; - if (!(args->flags & MSM_SUBMIT_NO_IMPLICIT)) { - ret = submit_fence_sync(submit); - if (ret) - goto out; - } + ret = submit_fence_sync(submit, !!(args->flags & MSM_SUBMIT_NO_IMPLICIT)); + if (ret) + goto out; ret = submit_pin_objects(submit); if (ret) @@ -522,7 +548,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, submit->nr_cmds = i; - submit->fence = msm_fence_alloc(gpu->fctx); + submit->fence = msm_fence_alloc(ring->fctx); if (IS_ERR(submit->fence)) { ret = PTR_ERR(submit->fence); submit->fence = NULL; @@ -555,7 +581,6 @@ out: out_unlock: if (ret && (out_fence_fd >= 0)) put_unused_fd(out_fence_fd); - priv->struct_mutex_task = NULL; mutex_unlock(&dev->struct_mutex); return ret; } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index ffbff27600e0..8d4477818ec2 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -20,6 +20,8 @@ #include "msm_mmu.h" #include "msm_fence.h" +#include <linux/string_helpers.h> + /* * Power Management: @@ -221,33 +223,102 @@ int msm_gpu_hw_init(struct msm_gpu *gpu) * Hangcheck detection for locked gpu: */ +static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + uint32_t fence) +{ + struct msm_gem_submit *submit; + + list_for_each_entry(submit, &ring->submits, node) { + if (submit->seqno > fence) + break; + + msm_update_fence(submit->ring->fctx, + submit->fence->seqno); + } +} + +static struct msm_gem_submit * +find_submit(struct msm_ringbuffer *ring, uint32_t fence) +{ + struct msm_gem_submit *submit; + + WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex)); + + list_for_each_entry(submit, &ring->submits, node) + if (submit->seqno == fence) + return submit; + + return NULL; +} + static void retire_submits(struct msm_gpu *gpu); static void recover_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; struct msm_gem_submit *submit; - uint32_t fence = gpu->funcs->last_fence(gpu); - - msm_update_fence(gpu->fctx, fence + 1); + struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); + int i; mutex_lock(&dev->struct_mutex); dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); - list_for_each_entry(submit, &gpu->submit_list, node) { - if (submit->fence->seqno == (fence + 1)) { - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(submit->pid, PIDTYPE_PID); - if (task) { - dev_err(dev->dev, "%s: offending task: %s\n", - gpu->name, task->comm); - } - rcu_read_unlock(); - break; + + submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); + if (submit) { + struct task_struct *task; + + rcu_read_lock(); + task = pid_task(submit->pid, PIDTYPE_PID); + if (task) { + char *cmd; + + /* + * So slightly annoying, in other paths like + * mmap'ing gem buffers, mmap_sem is acquired + * before struct_mutex, which means we can't + * hold struct_mutex across the call to + * get_cmdline(). But submits are retired + * from the same in-order workqueue, so we can + * safely drop the lock here without worrying + * about the submit going away. + */ + mutex_unlock(&dev->struct_mutex); + cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL); + mutex_lock(&dev->struct_mutex); + + dev_err(dev->dev, "%s: offending task: %s (%s)\n", + gpu->name, task->comm, cmd); + + msm_rd_dump_submit(priv->hangrd, submit, + "offending task: %s (%s)", task->comm, cmd); + } else { + msm_rd_dump_submit(priv->hangrd, submit, NULL); } + rcu_read_unlock(); + } + + + /* + * Update all the rings with the latest and greatest fence.. this + * needs to happen after msm_rd_dump_submit() to ensure that the + * bo's referenced by the offending submit are still around. + */ + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + uint32_t fence = ring->memptrs->fence; + + /* + * For the current (faulting?) ring/submit advance the fence by + * one more to clear the faulting submit + */ + if (ring == cur_ring) + fence++; + + update_fences(gpu, ring, fence); } if (msm_gpu_active(gpu)) { @@ -258,9 +329,15 @@ static void recover_worker(struct work_struct *work) gpu->funcs->recover(gpu); pm_runtime_put_sync(&gpu->pdev->dev); - /* replay the remaining submits after the one that hung: */ - list_for_each_entry(submit, &gpu->submit_list, node) { - gpu->funcs->submit(gpu, submit, NULL); + /* + * Replay all remaining submits starting with highest priority + * ring + */ + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + list_for_each_entry(submit, &ring->submits, node) + gpu->funcs->submit(gpu, submit, NULL); } } @@ -281,25 +358,27 @@ static void hangcheck_handler(unsigned long data) struct msm_gpu *gpu = (struct msm_gpu *)data; struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; - uint32_t fence = gpu->funcs->last_fence(gpu); + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); + uint32_t fence = ring->memptrs->fence; - if (fence != gpu->hangcheck_fence) { + if (fence != ring->hangcheck_fence) { /* some progress has been made.. ya! */ - gpu->hangcheck_fence = fence; - } else if (fence < gpu->fctx->last_fence) { + ring->hangcheck_fence = fence; + } else if (fence < ring->seqno) { /* no progress and not done.. hung! */ - gpu->hangcheck_fence = fence; - dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n", - gpu->name); + ring->hangcheck_fence = fence; + dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", + gpu->name, ring->id); dev_err(dev->dev, "%s: completed fence: %u\n", gpu->name, fence); dev_err(dev->dev, "%s: submitted fence: %u\n", - gpu->name, gpu->fctx->last_fence); + gpu->name, ring->seqno); + queue_work(priv->wq, &gpu->recover_work); } /* if still more pending work, reset the hangcheck timer: */ - if (gpu->fctx->last_fence > gpu->hangcheck_fence) + if (ring->seqno > ring->hangcheck_fence) hangcheck_timer_reset(gpu); /* workaround for missing irq: */ @@ -428,19 +507,18 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) static void retire_submits(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; + struct msm_gem_submit *submit, *tmp; + int i; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - while (!list_empty(&gpu->submit_list)) { - struct msm_gem_submit *submit; + /* Retire the commits starting with highest priority */ + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; - submit = list_first_entry(&gpu->submit_list, - struct msm_gem_submit, node); - - if (dma_fence_is_signaled(submit->fence)) { - retire_submit(gpu, submit); - } else { - break; + list_for_each_entry_safe(submit, tmp, &ring->submits, node) { + if (dma_fence_is_signaled(submit->fence)) + retire_submit(gpu, submit); } } } @@ -449,9 +527,10 @@ static void retire_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); struct drm_device *dev = gpu->dev; - uint32_t fence = gpu->funcs->last_fence(gpu); + int i; - msm_update_fence(gpu->fctx, fence); + for (i = 0; i < gpu->nr_rings; i++) + update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); mutex_lock(&dev->struct_mutex); retire_submits(gpu); @@ -472,6 +551,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring = submit->ring; int i; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -480,9 +560,11 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, msm_gpu_hw_init(gpu); - list_add_tail(&submit->node, &gpu->submit_list); + submit->seqno = ++ring->seqno; - msm_rd_dump_submit(submit); + list_add_tail(&submit->node, &ring->submits); + + msm_rd_dump_submit(priv->rd, submit, NULL); update_sw_cntrs(gpu); @@ -605,7 +687,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, const char *name, struct msm_gpu_config *config) { - int ret; + int i, ret, nr_rings = config->nr_rings; + void *memptrs; + uint64_t memptrs_iova; if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); @@ -613,18 +697,11 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, gpu->dev = drm; gpu->funcs = funcs; gpu->name = name; - gpu->fctx = msm_fence_context_alloc(drm, name); - if (IS_ERR(gpu->fctx)) { - ret = PTR_ERR(gpu->fctx); - gpu->fctx = NULL; - goto fail; - } INIT_LIST_HEAD(&gpu->active_list); INIT_WORK(&gpu->retire_work, retire_worker); INIT_WORK(&gpu->recover_work, recover_worker); - INIT_LIST_HEAD(&gpu->submit_list); setup_timer(&gpu->hangcheck_timer, hangcheck_handler, (unsigned long)gpu); @@ -689,36 +766,79 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, goto fail; } - /* Create ringbuffer: */ - gpu->rb = msm_ringbuffer_new(gpu, config->ringsz); - if (IS_ERR(gpu->rb)) { - ret = PTR_ERR(gpu->rb); - gpu->rb = NULL; - dev_err(drm->dev, "could not create ringbuffer: %d\n", ret); + memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), + MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo, + &memptrs_iova); + + if (IS_ERR(memptrs)) { + ret = PTR_ERR(memptrs); + dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); goto fail; } + if (nr_rings > ARRAY_SIZE(gpu->rb)) { + DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n", + ARRAY_SIZE(gpu->rb)); + nr_rings = ARRAY_SIZE(gpu->rb); + } + + /* Create ringbuffer(s): */ + for (i = 0; i < nr_rings; i++) { + gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova); + + if (IS_ERR(gpu->rb[i])) { + ret = PTR_ERR(gpu->rb[i]); + dev_err(drm->dev, + "could not create ringbuffer %d: %d\n", i, ret); + goto fail; + } + + memptrs += sizeof(struct msm_rbmemptrs); + memptrs_iova += sizeof(struct msm_rbmemptrs); + } + + gpu->nr_rings = nr_rings; + return 0; fail: + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + msm_ringbuffer_destroy(gpu->rb[i]); + gpu->rb[i] = NULL; + } + + if (gpu->memptrs_bo) { + msm_gem_put_vaddr(gpu->memptrs_bo); + msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); + drm_gem_object_unreference_unlocked(gpu->memptrs_bo); + } + platform_set_drvdata(pdev, NULL); return ret; } void msm_gpu_cleanup(struct msm_gpu *gpu) { + int i; + DBG("%s", gpu->name); WARN_ON(!list_empty(&gpu->active_list)); bs_fini(gpu); - if (gpu->rb) { - if (gpu->rb_iova) - msm_gem_put_iova(gpu->rb->bo, gpu->aspace); - msm_ringbuffer_destroy(gpu->rb); + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + msm_ringbuffer_destroy(gpu->rb[i]); + gpu->rb[i] = NULL; } - if (gpu->aspace) { + + if (gpu->memptrs_bo) { + msm_gem_put_vaddr(gpu->memptrs_bo); + msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); + drm_gem_object_unreference_unlocked(gpu->memptrs_bo); + } + + if (!IS_ERR_OR_NULL(gpu->aspace)) { gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu, NULL, 0); msm_gem_address_space_put(gpu->aspace); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index df4e2771fb85..e113d64574d3 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -33,7 +33,7 @@ struct msm_gpu_config { const char *irqname; uint64_t va_start; uint64_t va_end; - unsigned int ringsz; + unsigned int nr_rings; }; /* So far, with hardware that I've seen to date, we can have: @@ -57,9 +57,9 @@ struct msm_gpu_funcs { int (*pm_resume)(struct msm_gpu *gpu); void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); - void (*flush)(struct msm_gpu *gpu); + void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); irqreturn_t (*irq)(struct msm_gpu *irq); - uint32_t (*last_fence)(struct msm_gpu *gpu); + struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu); void (*recover)(struct msm_gpu *gpu); void (*destroy)(struct msm_gpu *gpu); #ifdef CONFIG_DEBUG_FS @@ -86,16 +86,12 @@ struct msm_gpu { const struct msm_gpu_perfcntr *perfcntrs; uint32_t num_perfcntrs; - /* ringbuffer: */ - struct msm_ringbuffer *rb; - uint64_t rb_iova; + struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS]; + int nr_rings; /* list of GEM active objects: */ struct list_head active_list; - /* fencing: */ - struct msm_fence_context *fctx; - /* does gpu need hw_init? */ bool needs_hw_init; @@ -126,15 +122,31 @@ struct msm_gpu { #define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */ #define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD) struct timer_list hangcheck_timer; - uint32_t hangcheck_fence; struct work_struct recover_work; - struct list_head submit_list; + struct drm_gem_object *memptrs_bo; }; +/* It turns out that all targets use the same ringbuffer size */ +#define MSM_GPU_RINGBUFFER_SZ SZ_32K +#define MSM_GPU_RINGBUFFER_BLKSIZE 32 + +#define MSM_GPU_RB_CNTL_DEFAULT \ + (AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | \ + AXXX_CP_RB_CNTL_BLKSZ(ilog2(MSM_GPU_RINGBUFFER_BLKSIZE / 8))) + static inline bool msm_gpu_active(struct msm_gpu *gpu) { - return gpu->fctx->last_fence > gpu->funcs->last_fence(gpu); + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + if (ring->seqno > ring->memptrs->fence) + return true; + } + + return false; } /* Perf-Counters: @@ -150,6 +162,15 @@ struct msm_gpu_perfcntr { const char *name; }; +struct msm_gpu_submitqueue { + int id; + u32 flags; + u32 prio; + int faults; + struct list_head node; + struct kref ref; +}; + static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) { msm_writel(data, gpu->mmio + (reg << 2)); @@ -223,4 +244,10 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev); void __init adreno_register(void); void __exit adreno_unregister(void); +static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue) +{ + if (queue) + kref_put(&queue->ref, msm_submitqueue_destroy); +} + #endif /* __MSM_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index 0366b8092f97..3aa8a8576abe 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -19,11 +19,17 @@ * * tail -f /sys/kernel/debug/dri/<minor>/rd > logfile.rd * - * To log the cmdstream in a format that is understood by freedreno/cffdump + * to log the cmdstream in a format that is understood by freedreno/cffdump * utility. By comparing the last successfully completed fence #, to the * cmdstream for the next fence, you can narrow down which process and submit * caused the gpu crash/lockup. * + * Additionally: + * + * tail -f /sys/kernel/debug/dri/<minor>/hangrd > logfile.rd + * + * will capture just the cmdstream from submits which triggered a GPU hang. + * * This bypasses drm_debugfs_create_files() mainly because we need to use * our own fops for a bit more control. In particular, we don't want to * do anything if userspace doesn't have the debugfs file open. @@ -111,10 +117,14 @@ static void rd_write(struct msm_rd_state *rd, const void *buf, int sz) wait_event(rd->fifo_event, circ_space(&rd->fifo) > 0); + /* Note that smp_load_acquire() is not strictly required + * as CIRC_SPACE_TO_END() does not access the tail more + * than once. + */ n = min(sz, circ_space_to_end(&rd->fifo)); memcpy(fptr, ptr, n); - fifo->head = (fifo->head + n) & (BUF_SZ - 1); + smp_store_release(&fifo->head, (fifo->head + n) & (BUF_SZ - 1)); sz -= n; ptr += n; @@ -145,13 +155,17 @@ static ssize_t rd_read(struct file *file, char __user *buf, if (ret) goto out; + /* Note that smp_load_acquire() is not strictly required + * as CIRC_CNT_TO_END() does not access the head more than + * once. + */ n = min_t(int, sz, circ_count_to_end(&rd->fifo)); if (copy_to_user(buf, fptr, n)) { ret = -EFAULT; goto out; } - fifo->tail = (fifo->tail + n) & (BUF_SZ - 1); + smp_store_release(&fifo->tail, (fifo->tail + n) & (BUF_SZ - 1)); *ppos += n; wake_up_all(&rd->fifo_event); @@ -212,53 +226,89 @@ static const struct file_operations rd_debugfs_fops = { .release = rd_release, }; -int msm_rd_debugfs_init(struct drm_minor *minor) + +static void rd_cleanup(struct msm_rd_state *rd) +{ + if (!rd) + return; + + mutex_destroy(&rd->read_lock); + kfree(rd); +} + +static struct msm_rd_state *rd_init(struct drm_minor *minor, const char *name) { - struct msm_drm_private *priv = minor->dev->dev_private; struct msm_rd_state *rd; struct dentry *ent; - - /* only create on first minor: */ - if (priv->rd) - return 0; + int ret = 0; rd = kzalloc(sizeof(*rd), GFP_KERNEL); if (!rd) - return -ENOMEM; + return ERR_PTR(-ENOMEM); rd->dev = minor->dev; rd->fifo.buf = rd->buf; mutex_init(&rd->read_lock); - priv->rd = rd; init_waitqueue_head(&rd->fifo_event); - ent = debugfs_create_file("rd", S_IFREG | S_IRUGO, + ent = debugfs_create_file(name, S_IFREG | S_IRUGO, minor->debugfs_root, rd, &rd_debugfs_fops); if (!ent) { - DRM_ERROR("Cannot create /sys/kernel/debug/dri/%pd/rd\n", - minor->debugfs_root); + DRM_ERROR("Cannot create /sys/kernel/debug/dri/%pd/%s\n", + minor->debugfs_root, name); + ret = -ENOMEM; + goto fail; + } + + return rd; + +fail: + rd_cleanup(rd); + return ERR_PTR(ret); +} + +int msm_rd_debugfs_init(struct drm_minor *minor) +{ + struct msm_drm_private *priv = minor->dev->dev_private; + struct msm_rd_state *rd; + int ret; + + /* only create on first minor: */ + if (priv->rd) + return 0; + + rd = rd_init(minor, "rd"); + if (IS_ERR(rd)) { + ret = PTR_ERR(rd); goto fail; } + priv->rd = rd; + + rd = rd_init(minor, "hangrd"); + if (IS_ERR(rd)) { + ret = PTR_ERR(rd); + goto fail; + } + + priv->hangrd = rd; + return 0; fail: msm_rd_debugfs_cleanup(priv); - return -1; + return ret; } void msm_rd_debugfs_cleanup(struct msm_drm_private *priv) { - struct msm_rd_state *rd = priv->rd; - - if (!rd) - return; - + rd_cleanup(priv->rd); priv->rd = NULL; - mutex_destroy(&rd->read_lock); - kfree(rd); + + rd_cleanup(priv->hangrd); + priv->hangrd = NULL; } static void snapshot_buf(struct msm_rd_state *rd, @@ -268,10 +318,6 @@ static void snapshot_buf(struct msm_rd_state *rd, struct msm_gem_object *obj = submit->bos[idx].obj; const char *buf; - buf = msm_gem_get_vaddr(&obj->base); - if (IS_ERR(buf)) - return; - if (iova) { buf += iova - submit->bos[idx].iova; } else { @@ -279,20 +325,33 @@ static void snapshot_buf(struct msm_rd_state *rd, size = obj->base.size; } + /* + * Always write the GPUADDR header so can get a complete list of all the + * buffers in the cmd + */ rd_write_section(rd, RD_GPUADDR, (uint32_t[3]){ iova, size, iova >> 32 }, 12); + + /* But only dump the contents of buffers marked READ */ + if (!(submit->bos[idx].flags & MSM_SUBMIT_BO_READ)) + return; + + buf = msm_gem_get_vaddr_active(&obj->base); + if (IS_ERR(buf)) + return; + rd_write_section(rd, RD_BUFFER_CONTENTS, buf, size); msm_gem_put_vaddr(&obj->base); } /* called under struct_mutex */ -void msm_rd_dump_submit(struct msm_gem_submit *submit) +void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit, + const char *fmt, ...) { struct drm_device *dev = submit->dev; - struct msm_drm_private *priv = dev->dev_private; - struct msm_rd_state *rd = priv->rd; - char msg[128]; + struct task_struct *task; + char msg[256]; int i, n; if (!rd->open) @@ -303,23 +362,32 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit) */ WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u", - TASK_COMM_LEN, current->comm, task_pid_nr(current), - submit->fence->seqno); + if (fmt) { + va_list args; - rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4)); + va_start(args, fmt); + n = vsnprintf(msg, sizeof(msg), fmt, args); + va_end(args); - if (rd_full) { - for (i = 0; i < submit->nr_bos; i++) { - /* buffers that are written to probably don't start out - * with anything interesting: - */ - if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) - continue; + rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4)); + } - snapshot_buf(rd, submit, i, 0, 0); - } + rcu_read_lock(); + task = pid_task(submit->pid, PIDTYPE_PID); + if (task) { + n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u", + TASK_COMM_LEN, task->comm, + pid_nr(submit->pid), submit->seqno); + } else { + n = snprintf(msg, sizeof(msg), "???/%d: fence=%u", + pid_nr(submit->pid), submit->seqno); } + rcu_read_unlock(); + + rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4)); + + for (i = 0; rd_full && i < submit->nr_bos; i++) + snapshot_buf(rd, submit, i, 0, 0); for (i = 0; i < submit->nr_cmds; i++) { uint64_t iova = submit->cmd[i].iova; diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index bf065a540130..6ca98da35f63 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -18,13 +18,15 @@ #include "msm_ringbuffer.h" #include "msm_gpu.h" -struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) +struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, + void *memptrs, uint64_t memptrs_iova) { struct msm_ringbuffer *ring; + char name[32]; int ret; - if (WARN_ON(!is_power_of_2(size))) - return ERR_PTR(-EINVAL); + /* We assume everwhere that MSM_GPU_RINGBUFFER_SZ is a power of 2 */ + BUILD_BUG_ON(!is_power_of_2(MSM_GPU_RINGBUFFER_SZ)); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) { @@ -33,32 +35,46 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) } ring->gpu = gpu; - + ring->id = id; /* Pass NULL for the iova pointer - we will map it later */ - ring->start = msm_gem_kernel_new(gpu->dev, size, MSM_BO_WC, - gpu->aspace, &ring->bo, NULL); + ring->start = msm_gem_kernel_new(gpu->dev, MSM_GPU_RINGBUFFER_SZ, + MSM_BO_WC, gpu->aspace, &ring->bo, NULL); if (IS_ERR(ring->start)) { ret = PTR_ERR(ring->start); ring->start = 0; goto fail; } - ring->end = ring->start + (size / 4); + ring->end = ring->start + (MSM_GPU_RINGBUFFER_SZ >> 2); + ring->next = ring->start; ring->cur = ring->start; - ring->size = size; + ring->memptrs = memptrs; + ring->memptrs_iova = memptrs_iova; + + INIT_LIST_HEAD(&ring->submits); + spin_lock_init(&ring->lock); + + snprintf(name, sizeof(name), "gpu-ring-%d", ring->id); + + ring->fctx = msm_fence_context_alloc(gpu->dev, name); return ring; fail: - if (ring) - msm_ringbuffer_destroy(ring); + msm_ringbuffer_destroy(ring); return ERR_PTR(ret); } void msm_ringbuffer_destroy(struct msm_ringbuffer *ring) { + if (IS_ERR_OR_NULL(ring)) + return; + + msm_fence_context_free(ring->fctx); + if (ring->bo) { + msm_gem_put_iova(ring->bo, ring->gpu->aspace); msm_gem_put_vaddr(ring->bo); drm_gem_object_unreference_unlocked(ring->bo); } diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 6e0e1049fa4f..cffce094aecb 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -20,14 +20,31 @@ #include "msm_drv.h" +#define rbmemptr(ring, member) \ + ((ring)->memptrs_iova + offsetof(struct msm_rbmemptrs, member)) + +struct msm_rbmemptrs { + volatile uint32_t rptr; + volatile uint32_t fence; +}; + struct msm_ringbuffer { struct msm_gpu *gpu; - int size; + int id; struct drm_gem_object *bo; - uint32_t *start, *end, *cur; + uint32_t *start, *end, *cur, *next; + struct list_head submits; + uint64_t iova; + uint32_t seqno; + uint32_t hangcheck_fence; + struct msm_rbmemptrs *memptrs; + uint64_t memptrs_iova; + struct msm_fence_context *fctx; + spinlock_t lock; }; -struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size); +struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, + void *memptrs, uint64_t memptrs_iova); void msm_ringbuffer_destroy(struct msm_ringbuffer *ring); /* ringbuffer helpers (the parts that are same for a3xx/a2xx/z180..) */ @@ -35,9 +52,13 @@ void msm_ringbuffer_destroy(struct msm_ringbuffer *ring); static inline void OUT_RING(struct msm_ringbuffer *ring, uint32_t data) { - if (ring->cur == ring->end) - ring->cur = ring->start; - *(ring->cur++) = data; + /* + * ring->next points to the current command being written - it won't be + * committed as ring->cur until the flush + */ + if (ring->next == ring->end) + ring->next = ring->start; + *(ring->next++) = data; } #endif /* __MSM_RINGBUFFER_H__ */ diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c new file mode 100644 index 000000000000..5115f75b5b7f --- /dev/null +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -0,0 +1,152 @@ +/* Copyright (c) 2017 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/kref.h> +#include "msm_gpu.h" + +void msm_submitqueue_destroy(struct kref *kref) +{ + struct msm_gpu_submitqueue *queue = container_of(kref, + struct msm_gpu_submitqueue, ref); + + kfree(queue); +} + +struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, + u32 id) +{ + struct msm_gpu_submitqueue *entry; + + if (!ctx) + return NULL; + + read_lock(&ctx->queuelock); + + list_for_each_entry(entry, &ctx->submitqueues, node) { + if (entry->id == id) { + kref_get(&entry->ref); + read_unlock(&ctx->queuelock); + + return entry; + } + } + + read_unlock(&ctx->queuelock); + return NULL; +} + +void msm_submitqueue_close(struct msm_file_private *ctx) +{ + struct msm_gpu_submitqueue *entry, *tmp; + + if (!ctx) + return; + + /* + * No lock needed in close and there won't + * be any more user ioctls coming our way + */ + list_for_each_entry_safe(entry, tmp, &ctx->submitqueues, node) + msm_submitqueue_put(entry); +} + +int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, + u32 prio, u32 flags, u32 *id) +{ + struct msm_drm_private *priv = drm->dev_private; + struct msm_gpu_submitqueue *queue; + + if (!ctx) + return -ENODEV; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + + if (!queue) + return -ENOMEM; + + kref_init(&queue->ref); + queue->flags = flags; + + if (priv->gpu) { + if (prio >= priv->gpu->nr_rings) + return -EINVAL; + + queue->prio = prio; + } + + write_lock(&ctx->queuelock); + + queue->id = ctx->queueid++; + + if (id) + *id = queue->id; + + list_add_tail(&queue->node, &ctx->submitqueues); + + write_unlock(&ctx->queuelock); + + return 0; +} + +int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx) +{ + struct msm_drm_private *priv = drm->dev_private; + int default_prio; + + if (!ctx) + return 0; + + /* + * Select priority 2 as the "default priority" unless nr_rings is less + * than 2 and then pick the lowest pirority + */ + default_prio = priv->gpu ? + clamp_t(uint32_t, 2, 0, priv->gpu->nr_rings - 1) : 0; + + INIT_LIST_HEAD(&ctx->submitqueues); + + rwlock_init(&ctx->queuelock); + + return msm_submitqueue_create(drm, ctx, default_prio, 0, NULL); +} + +int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id) +{ + struct msm_gpu_submitqueue *entry; + + if (!ctx) + return 0; + + /* + * id 0 is the "default" queue and can't be destroyed + * by the user + */ + if (!id) + return -ENOENT; + + write_lock(&ctx->queuelock); + + list_for_each_entry(entry, &ctx->submitqueues, node) { + if (entry->id == id) { + list_del(&entry->node); + write_unlock(&ctx->queuelock); + + msm_submitqueue_put(entry); + return 0; + } + } + + write_unlock(&ctx->queuelock); + return -ENOENT; +} + diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild index 2e9ce53ae3a8..9c0c650655e9 100644 --- a/drivers/gpu/drm/nouveau/Kbuild +++ b/drivers/gpu/drm/nouveau/Kbuild @@ -30,9 +30,11 @@ nouveau-y += nouveau_vga.o # DRM - memory management nouveau-y += nouveau_bo.o nouveau-y += nouveau_gem.o +nouveau-y += nouveau_mem.o nouveau-y += nouveau_prime.o nouveau-y += nouveau_sgdma.o nouveau-y += nouveau_ttm.o +nouveau-y += nouveau_vmm.o # DRM - modesetting nouveau-$(CONFIG_DRM_NOUVEAU_BACKLIGHT) += nouveau_backlight.o diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index c02a13406a81..4b75ad40dd80 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -56,6 +56,13 @@ config NOUVEAU_DEBUG_DEFAULT help Selects the default debug level +config NOUVEAU_DEBUG_MMU + bool "Enable additional MMU debugging" + depends on DRM_NOUVEAU + default n + help + Say Y here if you want to enable verbose MMU debug output. + config DRM_NOUVEAU_BACKLIGHT bool "Support for backlight control" depends on DRM_NOUVEAU diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c index 5b9d549aa791..501d2d290e9c 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c @@ -48,7 +48,7 @@ nv04_display_create(struct drm_device *dev) if (!disp) return -ENOMEM; - nvif_object_map(&drm->client.device.object); + nvif_object_map(&drm->client.device.object, NULL, 0); nouveau_display(dev)->priv = disp; nouveau_display(dev)->dtor = nv04_display_destroy; diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl506e.h b/drivers/gpu/drm/nouveau/include/nvif/cl506e.h index aa94b8cf9679..f50866011002 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl506e.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl506e.h @@ -5,7 +5,7 @@ struct nv50_channel_dma_v0 { __u8 version; __u8 chid; __u8 pad02[6]; - __u64 vm; + __u64 vmm; __u64 pushbuf; __u64 offset; }; diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl506f.h b/drivers/gpu/drm/nouveau/include/nvif/cl506f.h index 3b7101966de4..0e5bbb553158 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl506f.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl506f.h @@ -8,6 +8,6 @@ struct nv50_channel_gpfifo_v0 { __u32 ilength; __u64 ioffset; __u64 pushbuf; - __u64 vm; + __u64 vmm; }; #endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl826e.h b/drivers/gpu/drm/nouveau/include/nvif/cl826e.h index 91e33db21a2f..7f6a8ce5a418 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl826e.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl826e.h @@ -5,7 +5,7 @@ struct g82_channel_dma_v0 { __u8 version; __u8 chid; __u8 pad02[6]; - __u64 vm; + __u64 vmm; __u64 pushbuf; __u64 offset; }; diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl826f.h b/drivers/gpu/drm/nouveau/include/nvif/cl826f.h index e34efd4ec537..c4d35522331a 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl826f.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl826f.h @@ -8,7 +8,7 @@ struct g82_channel_gpfifo_v0 { __u32 ilength; __u64 ioffset; __u64 pushbuf; - __u64 vm; + __u64 vmm; }; #define NV826F_V0_NTFY_NON_STALL_INTERRUPT 0x00 diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl906f.h b/drivers/gpu/drm/nouveau/include/nvif/cl906f.h index a2d5410a491b..169161c1587f 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl906f.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl906f.h @@ -7,7 +7,7 @@ struct fermi_channel_gpfifo_v0 { __u8 pad02[2]; __u32 ilength; __u64 ioffset; - __u64 vm; + __u64 vmm; }; #define NV906F_V0_NTFY_NON_STALL_INTERRUPT 0x00 diff --git a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h index 2efa3d048bb9..3e57089526e3 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h @@ -22,7 +22,7 @@ struct kepler_channel_gpfifo_a_v0 { __u32 engines; __u32 ilength; __u64 ioffset; - __u64 vm; + __u64 vmm; }; #define NVA06F_V0_NTFY_NON_STALL_INTERRUPT 0x00 diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h index d08da82ba7ed..56aade45067d 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/class.h +++ b/drivers/gpu/drm/nouveau/include/nvif/class.h @@ -14,6 +14,23 @@ #define NVIF_CLASS_SW_NV50 /* if0005.h */ -0x00000006 #define NVIF_CLASS_SW_GF100 /* if0005.h */ -0x00000007 +#define NVIF_CLASS_MMU /* if0008.h */ 0x80000008 +#define NVIF_CLASS_MMU_NV04 /* if0008.h */ 0x80000009 +#define NVIF_CLASS_MMU_NV50 /* if0008.h */ 0x80005009 +#define NVIF_CLASS_MMU_GF100 /* if0008.h */ 0x80009009 + +#define NVIF_CLASS_MEM /* if000a.h */ 0x8000000a +#define NVIF_CLASS_MEM_NV04 /* if000b.h */ 0x8000000b +#define NVIF_CLASS_MEM_NV50 /* if500b.h */ 0x8000500b +#define NVIF_CLASS_MEM_GF100 /* if900b.h */ 0x8000900b + +#define NVIF_CLASS_VMM /* if000c.h */ 0x8000000c +#define NVIF_CLASS_VMM_NV04 /* if000d.h */ 0x8000000d +#define NVIF_CLASS_VMM_NV50 /* if500d.h */ 0x8000500d +#define NVIF_CLASS_VMM_GF100 /* if900d.h */ 0x8000900d +#define NVIF_CLASS_VMM_GM200 /* ifb00d.h */ 0x8000b00d +#define NVIF_CLASS_VMM_GP100 /* ifc00d.h */ 0x8000c00d + /* the below match nvidia-assigned (either in hw, or sw) class numbers */ #define NV_NULL_CLASS 0x00000030 diff --git a/drivers/gpu/drm/nouveau/include/nvif/device.h b/drivers/gpu/drm/nouveau/include/nvif/device.h index bcb981711617..b579633b80c0 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/device.h +++ b/drivers/gpu/drm/nouveau/include/nvif/device.h @@ -38,7 +38,6 @@ u64 nvif_device_time(struct nvif_device *); /*XXX*/ #include <subdev/bios.h> #include <subdev/fb.h> -#include <subdev/mmu.h> #include <subdev/bar.h> #include <subdev/gpio.h> #include <subdev/clk.h> @@ -57,8 +56,6 @@ u64 nvif_device_time(struct nvif_device *); }) #define nvxx_bios(a) nvxx_device(a)->bios #define nvxx_fb(a) nvxx_device(a)->fb -#define nvxx_mmu(a) nvxx_device(a)->mmu -#define nvxx_bar(a) nvxx_device(a)->bar #define nvxx_gpio(a) nvxx_device(a)->gpio #define nvxx_clk(a) nvxx_device(a)->clk #define nvxx_i2c(a) nvxx_device(a)->i2c @@ -66,10 +63,8 @@ u64 nvif_device_time(struct nvif_device *); #define nvxx_therm(a) nvxx_device(a)->therm #define nvxx_volt(a) nvxx_device(a)->volt -#include <core/device.h> #include <engine/fifo.h> #include <engine/gr.h> -#include <engine/sw.h> #define nvxx_fifo(a) nvxx_device(a)->fifo #define nvxx_gr(a) nvxx_device(a)->gr diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0008.h b/drivers/gpu/drm/nouveau/include/nvif/if0008.h new file mode 100644 index 000000000000..8450127420f5 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if0008.h @@ -0,0 +1,42 @@ +#ifndef __NVIF_IF0008_H__ +#define __NVIF_IF0008_H__ +struct nvif_mmu_v0 { + __u8 version; + __u8 dmabits; + __u8 heap_nr; + __u8 type_nr; + __u16 kind_nr; +}; + +#define NVIF_MMU_V0_HEAP 0x00 +#define NVIF_MMU_V0_TYPE 0x01 +#define NVIF_MMU_V0_KIND 0x02 + +struct nvif_mmu_heap_v0 { + __u8 version; + __u8 index; + __u8 pad02[6]; + __u64 size; +}; + +struct nvif_mmu_type_v0 { + __u8 version; + __u8 index; + __u8 heap; + __u8 vram; + __u8 host; + __u8 comp; + __u8 disp; + __u8 kind; + __u8 mappable; + __u8 coherent; + __u8 uncached; +}; + +struct nvif_mmu_kind_v0 { + __u8 version; + __u8 pad01[1]; + __u16 count; + __u8 data[]; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000a.h b/drivers/gpu/drm/nouveau/include/nvif/if000a.h new file mode 100644 index 000000000000..88d0938fbd5a --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if000a.h @@ -0,0 +1,22 @@ +#ifndef __NVIF_IF000A_H__ +#define __NVIF_IF000A_H__ +struct nvif_mem_v0 { + __u8 version; + __u8 type; + __u8 page; + __u8 pad03[5]; + __u64 size; + __u64 addr; + __u8 data[]; +}; + +struct nvif_mem_ram_vn { +}; + +struct nvif_mem_ram_v0 { + __u8 version; + __u8 pad01[7]; + dma_addr_t *dma; + struct scatterlist *sgl; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000b.h b/drivers/gpu/drm/nouveau/include/nvif/if000b.h new file mode 100644 index 000000000000..c677fb0293da --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if000b.h @@ -0,0 +1,11 @@ +#ifndef __NVIF_IF000B_H__ +#define __NVIF_IF000B_H__ +#include "if000a.h" + +struct nv04_mem_vn { + /* nvkm_mem_vX ... */ +}; + +struct nv04_mem_map_vn { +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000c.h b/drivers/gpu/drm/nouveau/include/nvif/if000c.h new file mode 100644 index 000000000000..2928ecd989ad --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if000c.h @@ -0,0 +1,64 @@ +#ifndef __NVIF_IF000C_H__ +#define __NVIF_IF000C_H__ +struct nvif_vmm_v0 { + __u8 version; + __u8 page_nr; + __u8 pad02[6]; + __u64 addr; + __u64 size; + __u8 data[]; +}; + +#define NVIF_VMM_V0_PAGE 0x00 +#define NVIF_VMM_V0_GET 0x01 +#define NVIF_VMM_V0_PUT 0x02 +#define NVIF_VMM_V0_MAP 0x03 +#define NVIF_VMM_V0_UNMAP 0x04 + +struct nvif_vmm_page_v0 { + __u8 version; + __u8 index; + __u8 shift; + __u8 sparse; + __u8 vram; + __u8 host; + __u8 comp; + __u8 pad07[1]; +}; + +struct nvif_vmm_get_v0 { + __u8 version; +#define NVIF_VMM_GET_V0_ADDR 0x00 +#define NVIF_VMM_GET_V0_PTES 0x01 +#define NVIF_VMM_GET_V0_LAZY 0x02 + __u8 type; + __u8 sparse; + __u8 page; + __u8 align; + __u8 pad05[3]; + __u64 size; + __u64 addr; +}; + +struct nvif_vmm_put_v0 { + __u8 version; + __u8 pad01[7]; + __u64 addr; +}; + +struct nvif_vmm_map_v0 { + __u8 version; + __u8 pad01[7]; + __u64 addr; + __u64 size; + __u64 memory; + __u64 offset; + __u8 data[]; +}; + +struct nvif_vmm_unmap_v0 { + __u8 version; + __u8 pad01[7]; + __u64 addr; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000d.h b/drivers/gpu/drm/nouveau/include/nvif/if000d.h new file mode 100644 index 000000000000..516ec9401401 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if000d.h @@ -0,0 +1,12 @@ +#ifndef __NVIF_IF000D_H__ +#define __NVIF_IF000D_H__ +#include "if000c.h" + +struct nv04_vmm_vn { + /* nvif_vmm_vX ... */ +}; + +struct nv04_vmm_map_vn { + /* nvif_vmm_map_vX ... */ +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/if500b.h b/drivers/gpu/drm/nouveau/include/nvif/if500b.h new file mode 100644 index 000000000000..c7c8431fb2ce --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if500b.h @@ -0,0 +1,25 @@ +#ifndef __NVIF_IF500B_H__ +#define __NVIF_IF500B_H__ +#include "if000a.h" + +struct nv50_mem_vn { + /* nvif_mem_vX ... */ +}; + +struct nv50_mem_v0 { + /* nvif_mem_vX ... */ + __u8 version; + __u8 bankswz; + __u8 contig; +}; + +struct nv50_mem_map_vn { +}; + +struct nv50_mem_map_v0 { + __u8 version; + __u8 ro; + __u8 kind; + __u8 comp; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/if500d.h b/drivers/gpu/drm/nouveau/include/nvif/if500d.h new file mode 100644 index 000000000000..c29a7822b363 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if500d.h @@ -0,0 +1,21 @@ +#ifndef __NVIF_IF500D_H__ +#define __NVIF_IF500D_H__ +#include "if000c.h" + +struct nv50_vmm_vn { + /* nvif_vmm_vX ... */ +}; + +struct nv50_vmm_map_vn { + /* nvif_vmm_map_vX ... */ +}; + +struct nv50_vmm_map_v0 { + /* nvif_vmm_map_vX ... */ + __u8 version; + __u8 ro; + __u8 priv; + __u8 kind; + __u8 comp; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/if900b.h b/drivers/gpu/drm/nouveau/include/nvif/if900b.h new file mode 100644 index 000000000000..9b164548eea8 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if900b.h @@ -0,0 +1,23 @@ +#ifndef __NVIF_IF900B_H__ +#define __NVIF_IF900B_H__ +#include "if000a.h" + +struct gf100_mem_vn { + /* nvif_mem_vX ... */ +}; + +struct gf100_mem_v0 { + /* nvif_mem_vX ... */ + __u8 version; + __u8 contig; +}; + +struct gf100_mem_map_vn { +}; + +struct gf100_mem_map_v0 { + __u8 version; + __u8 ro; + __u8 kind; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/if900d.h b/drivers/gpu/drm/nouveau/include/nvif/if900d.h new file mode 100644 index 000000000000..49aa50583c3d --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if900d.h @@ -0,0 +1,21 @@ +#ifndef __NVIF_IF900D_H__ +#define __NVIF_IF900D_H__ +#include "if000c.h" + +struct gf100_vmm_vn { + /* nvif_vmm_vX ... */ +}; + +struct gf100_vmm_map_vn { + /* nvif_vmm_map_vX ... */ +}; + +struct gf100_vmm_map_v0 { + /* nvif_vmm_map_vX ... */ + __u8 version; + __u8 vol; + __u8 ro; + __u8 priv; + __u8 kind; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/ifb00d.h b/drivers/gpu/drm/nouveau/include/nvif/ifb00d.h new file mode 100644 index 000000000000..a0e419830595 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/ifb00d.h @@ -0,0 +1,27 @@ +#ifndef __NVIF_IFB00D_H__ +#define __NVIF_IFB00D_H__ +#include "if000c.h" + +struct gm200_vmm_vn { + /* nvif_vmm_vX ... */ +}; + +struct gm200_vmm_v0 { + /* nvif_vmm_vX ... */ + __u8 version; + __u8 bigpage; +}; + +struct gm200_vmm_map_vn { + /* nvif_vmm_map_vX ... */ +}; + +struct gm200_vmm_map_v0 { + /* nvif_vmm_map_vX ... */ + __u8 version; + __u8 vol; + __u8 ro; + __u8 priv; + __u8 kind; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h b/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h new file mode 100644 index 000000000000..1d9c637859f3 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h @@ -0,0 +1,21 @@ +#ifndef __NVIF_IFC00D_H__ +#define __NVIF_IFC00D_H__ +#include "if000c.h" + +struct gp100_vmm_vn { + /* nvif_vmm_vX ... */ +}; + +struct gp100_vmm_map_vn { + /* nvif_vmm_map_vX ... */ +}; + +struct gp100_vmm_map_v0 { + /* nvif_vmm_map_vX ... */ + __u8 version; + __u8 vol; + __u8 ro; + __u8 priv; + __u8 kind; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/ioctl.h b/drivers/gpu/drm/nouveau/include/nvif/ioctl.h index c5f5eb83a594..1886366457f1 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/ioctl.h +++ b/drivers/gpu/drm/nouveau/include/nvif/ioctl.h @@ -1,7 +1,7 @@ #ifndef __NVIF_IOCTL_H__ #define __NVIF_IOCTL_H__ -#define NVIF_VERSION_LATEST 0x0000000000000000ULL +#define NVIF_VERSION_LATEST 0x0000000000000100ULL struct nvif_ioctl_v0 { __u8 version; @@ -83,9 +83,13 @@ struct nvif_ioctl_wr_v0 { struct nvif_ioctl_map_v0 { /* nvif_ioctl ... */ __u8 version; - __u8 pad01[3]; - __u32 length; +#define NVIF_IOCTL_MAP_V0_IO 0x00 +#define NVIF_IOCTL_MAP_V0_VA 0x01 + __u8 type; + __u8 pad02[6]; __u64 handle; + __u64 length; + __u8 data[]; }; struct nvif_ioctl_unmap { diff --git a/drivers/gpu/drm/nouveau/include/nvif/mem.h b/drivers/gpu/drm/nouveau/include/nvif/mem.h new file mode 100644 index 000000000000..b542fe38398e --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/mem.h @@ -0,0 +1,18 @@ +#ifndef __NVIF_MEM_H__ +#define __NVIF_MEM_H__ +#include "mmu.h" + +struct nvif_mem { + struct nvif_object object; + u8 type; + u8 page; + u64 addr; + u64 size; +}; + +int nvif_mem_init_type(struct nvif_mmu *mmu, s32 oclass, int type, u8 page, + u64 size, void *argv, u32 argc, struct nvif_mem *); +int nvif_mem_init(struct nvif_mmu *mmu, s32 oclass, u8 type, u8 page, + u64 size, void *argv, u32 argc, struct nvif_mem *); +void nvif_mem_fini(struct nvif_mem *); +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/mmu.h b/drivers/gpu/drm/nouveau/include/nvif/mmu.h new file mode 100644 index 000000000000..c8cd5b5b0688 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/mmu.h @@ -0,0 +1,56 @@ +#ifndef __NVIF_MMU_H__ +#define __NVIF_MMU_H__ +#include <nvif/object.h> + +struct nvif_mmu { + struct nvif_object object; + u8 dmabits; + u8 heap_nr; + u8 type_nr; + u16 kind_nr; + + struct { + u64 size; + } *heap; + + struct { +#define NVIF_MEM_VRAM 0x01 +#define NVIF_MEM_HOST 0x02 +#define NVIF_MEM_COMP 0x04 +#define NVIF_MEM_DISP 0x08 +#define NVIF_MEM_KIND 0x10 +#define NVIF_MEM_MAPPABLE 0x20 +#define NVIF_MEM_COHERENT 0x40 +#define NVIF_MEM_UNCACHED 0x80 + u8 type; + u8 heap; + } *type; + + u8 *kind; +}; + +int nvif_mmu_init(struct nvif_object *, s32 oclass, struct nvif_mmu *); +void nvif_mmu_fini(struct nvif_mmu *); + +static inline bool +nvif_mmu_kind_valid(struct nvif_mmu *mmu, u8 kind) +{ + const u8 invalid = mmu->kind_nr - 1; + if (kind) { + if (kind >= mmu->kind_nr || mmu->kind[kind] == invalid) + return false; + } + return true; +} + +static inline int +nvif_mmu_type(struct nvif_mmu *mmu, u8 mask) +{ + int i; + for (i = 0; i < mmu->type_nr; i++) { + if ((mmu->type[i].type & mask) == mask) + return i; + } + return -EINVAL; +} +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/object.h b/drivers/gpu/drm/nouveau/include/nvif/object.h index 9e58b305b020..0b54261bdefe 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/object.h +++ b/drivers/gpu/drm/nouveau/include/nvif/object.h @@ -16,7 +16,7 @@ struct nvif_object { void *priv; /*XXX: hack */ struct { void __iomem *ptr; - u32 size; + u64 size; } map; }; @@ -29,7 +29,10 @@ void nvif_object_sclass_put(struct nvif_sclass **); u32 nvif_object_rd(struct nvif_object *, int, u64); void nvif_object_wr(struct nvif_object *, int, u64, u32); int nvif_object_mthd(struct nvif_object *, u32, void *, u32); -int nvif_object_map(struct nvif_object *); +int nvif_object_map_handle(struct nvif_object *, void *, u32, + u64 *handle, u64 *length); +void nvif_object_unmap_handle(struct nvif_object *); +int nvif_object_map(struct nvif_object *, void *, u32); void nvif_object_unmap(struct nvif_object *); #define nvif_handle(a) (unsigned long)(void *)(a) diff --git a/drivers/gpu/drm/nouveau/include/nvif/os.h b/drivers/gpu/drm/nouveau/include/nvif/os.h index 9fcab67c8557..5efdf80d5abc 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/os.h +++ b/drivers/gpu/drm/nouveau/include/nvif/os.h @@ -33,18 +33,4 @@ #include <soc/tegra/fuse.h> #include <soc/tegra/pmc.h> - -#ifndef ioread32_native -#ifdef __BIG_ENDIAN -#define ioread16_native ioread16be -#define iowrite16_native iowrite16be -#define ioread32_native ioread32be -#define iowrite32_native iowrite32be -#else /* def __BIG_ENDIAN */ -#define ioread16_native ioread16 -#define iowrite16_native iowrite16 -#define ioread32_native ioread32 -#define iowrite32_native iowrite32 -#endif /* def __BIG_ENDIAN else */ -#endif /* !ioread32_native */ #endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/vmm.h b/drivers/gpu/drm/nouveau/include/nvif/vmm.h new file mode 100644 index 000000000000..c5db8a2e82df --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/vmm.h @@ -0,0 +1,42 @@ +#ifndef __NVIF_VMM_H__ +#define __NVIF_VMM_H__ +#include <nvif/object.h> +struct nvif_mem; +struct nvif_mmu; + +enum nvif_vmm_get { + ADDR, + PTES, + LAZY +}; + +struct nvif_vma { + u64 addr; + u64 size; +}; + +struct nvif_vmm { + struct nvif_object object; + u64 start; + u64 limit; + + struct { + u8 shift; + bool sparse:1; + bool vram:1; + bool host:1; + bool comp:1; + } *page; + int page_nr; +}; + +int nvif_vmm_init(struct nvif_mmu *, s32 oclass, u64 addr, u64 size, + void *argv, u32 argc, struct nvif_vmm *); +void nvif_vmm_fini(struct nvif_vmm *); +int nvif_vmm_get(struct nvif_vmm *, enum nvif_vmm_get, bool sparse, + u8 page, u8 align, u64 size, struct nvif_vma *); +void nvif_vmm_put(struct nvif_vmm *, struct nvif_vma *); +int nvif_vmm_map(struct nvif_vmm *, u64 addr, u64 size, void *argv, u32 argc, + struct nvif_mem *, u64 offset); +int nvif_vmm_unmap(struct nvif_vmm *, u64); +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h index e876634da10a..79624f6d0a2b 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h @@ -16,7 +16,8 @@ struct nvkm_client { void *data; int (*ntfy)(const void *, u32, const void *, u32); - struct nvkm_vm *vm; + struct list_head umem; + spinlock_t lock; }; int nvkm_client_new(const char *name, u64 device, const char *cfg, diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h index bb4c214f1046..5046e1db99ac 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h @@ -1,7 +1,7 @@ #ifndef __NVKM_DEVICE_H__ #define __NVKM_DEVICE_H__ +#include <core/oclass.h> #include <core/event.h> -#include <core/object.h> enum nvkm_devidx { NVKM_SUBDEV_PCI, diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h index d4cd2fbfde88..7730499bfd95 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h @@ -15,6 +15,7 @@ struct nvkm_engine { struct nvkm_engine_func { void *(*dtor)(struct nvkm_engine *); + void (*preinit)(struct nvkm_engine *); int (*oneinit)(struct nvkm_engine *); int (*init)(struct nvkm_engine *); int (*fini)(struct nvkm_engine *, bool suspend); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/gpuobj.h b/drivers/gpu/drm/nouveau/include/nvkm/core/gpuobj.h index c23da4f05929..51691667b813 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/gpuobj.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/gpuobj.h @@ -1,17 +1,16 @@ #ifndef __NVKM_GPUOBJ_H__ #define __NVKM_GPUOBJ_H__ -#include <core/object.h> #include <core/memory.h> #include <core/mm.h> -struct nvkm_vma; -struct nvkm_vm; #define NVOBJ_FLAG_ZERO_ALLOC 0x00000001 #define NVOBJ_FLAG_HEAP 0x00000004 struct nvkm_gpuobj { - struct nvkm_object object; - const struct nvkm_gpuobj_func *func; + union { + const struct nvkm_gpuobj_func *func; + const struct nvkm_gpuobj_func *ptrs; + }; struct nvkm_gpuobj *parent; struct nvkm_memory *memory; struct nvkm_mm_node *node; @@ -28,15 +27,14 @@ struct nvkm_gpuobj_func { void (*release)(struct nvkm_gpuobj *); u32 (*rd32)(struct nvkm_gpuobj *, u32 offset); void (*wr32)(struct nvkm_gpuobj *, u32 offset, u32 data); + int (*map)(struct nvkm_gpuobj *, u64 offset, struct nvkm_vmm *, + struct nvkm_vma *, void *argv, u32 argc); }; int nvkm_gpuobj_new(struct nvkm_device *, u32 size, int align, bool zero, struct nvkm_gpuobj *parent, struct nvkm_gpuobj **); void nvkm_gpuobj_del(struct nvkm_gpuobj **); int nvkm_gpuobj_wrap(struct nvkm_memory *, struct nvkm_gpuobj **); -int nvkm_gpuobj_map(struct nvkm_gpuobj *, struct nvkm_vm *, u32 access, - struct nvkm_vma *); -void nvkm_gpuobj_unmap(struct nvkm_vma *); void nvkm_gpuobj_memcpy_to(struct nvkm_gpuobj *dst, u32 dstoffset, void *src, u32 length); void nvkm_gpuobj_memcpy_from(void *dst, struct nvkm_gpuobj *src, u32 srcoffset, diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h b/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h index 33ca6769266a..13ebf4da2b96 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h @@ -3,7 +3,12 @@ #include <core/os.h> struct nvkm_device; struct nvkm_vma; -struct nvkm_vm; +struct nvkm_vmm; + +struct nvkm_tags { + struct nvkm_mm_node *mn; + refcount_t refcount; +}; enum nvkm_memory_target { NVKM_MEM_TARGET_INST, /* instance memory */ @@ -14,41 +19,84 @@ enum nvkm_memory_target { struct nvkm_memory { const struct nvkm_memory_func *func; + const struct nvkm_memory_ptrs *ptrs; + struct kref kref; + struct nvkm_tags *tags; }; struct nvkm_memory_func { void *(*dtor)(struct nvkm_memory *); enum nvkm_memory_target (*target)(struct nvkm_memory *); + u8 (*page)(struct nvkm_memory *); u64 (*addr)(struct nvkm_memory *); u64 (*size)(struct nvkm_memory *); - void (*boot)(struct nvkm_memory *, struct nvkm_vm *); + void (*boot)(struct nvkm_memory *, struct nvkm_vmm *); void __iomem *(*acquire)(struct nvkm_memory *); void (*release)(struct nvkm_memory *); + int (*map)(struct nvkm_memory *, u64 offset, struct nvkm_vmm *, + struct nvkm_vma *, void *argv, u32 argc); +}; + +struct nvkm_memory_ptrs { u32 (*rd32)(struct nvkm_memory *, u64 offset); void (*wr32)(struct nvkm_memory *, u64 offset, u32 data); - void (*map)(struct nvkm_memory *, struct nvkm_vma *, u64 offset); }; void nvkm_memory_ctor(const struct nvkm_memory_func *, struct nvkm_memory *); int nvkm_memory_new(struct nvkm_device *, enum nvkm_memory_target, u64 size, u32 align, bool zero, struct nvkm_memory **); -void nvkm_memory_del(struct nvkm_memory **); +struct nvkm_memory *nvkm_memory_ref(struct nvkm_memory *); +void nvkm_memory_unref(struct nvkm_memory **); +int nvkm_memory_tags_get(struct nvkm_memory *, struct nvkm_device *, u32 tags, + void (*clear)(struct nvkm_device *, u32, u32), + struct nvkm_tags **); +void nvkm_memory_tags_put(struct nvkm_memory *, struct nvkm_device *, + struct nvkm_tags **); + #define nvkm_memory_target(p) (p)->func->target(p) +#define nvkm_memory_page(p) (p)->func->page(p) #define nvkm_memory_addr(p) (p)->func->addr(p) #define nvkm_memory_size(p) (p)->func->size(p) #define nvkm_memory_boot(p,v) (p)->func->boot((p),(v)) -#define nvkm_memory_map(p,v,o) (p)->func->map((p),(v),(o)) +#define nvkm_memory_map(p,o,vm,va,av,ac) \ + (p)->func->map((p),(o),(vm),(va),(av),(ac)) /* accessor macros - kmap()/done() must bracket use of the other accessor * macros to guarantee correct behaviour across all chipsets */ #define nvkm_kmap(o) (o)->func->acquire(o) -#define nvkm_ro32(o,a) (o)->func->rd32((o), (a)) -#define nvkm_wo32(o,a,d) (o)->func->wr32((o), (a), (d)) +#define nvkm_done(o) (o)->func->release(o) + +#define nvkm_ro32(o,a) (o)->ptrs->rd32((o), (a)) +#define nvkm_wo32(o,a,d) (o)->ptrs->wr32((o), (a), (d)) #define nvkm_mo32(o,a,m,d) ({ \ u32 _addr = (a), _data = nvkm_ro32((o), _addr); \ nvkm_wo32((o), _addr, (_data & ~(m)) | (d)); \ _data; \ }) -#define nvkm_done(o) (o)->func->release(o) + +#define nvkm_wo64(o,a,d) do { \ + u64 __a = (a), __d = (d); \ + nvkm_wo32((o), __a + 0, lower_32_bits(__d)); \ + nvkm_wo32((o), __a + 4, upper_32_bits(__d)); \ +} while(0) + +#define nvkm_fill(t,s,o,a,d,c) do { \ + u64 _a = (a), _c = (c), _d = (d), _o = _a >> s, _s = _c << s; \ + u##t __iomem *_m = nvkm_kmap(o); \ + if (likely(_m)) { \ + if (_d) { \ + while (_c--) \ + iowrite##t##_native(_d, &_m[_o++]); \ + } else { \ + memset_io(&_m[_o], _d, _s); \ + } \ + } else { \ + for (; _c; _c--, _a += BIT(s)) \ + nvkm_wo##t((o), _a, _d); \ + } \ + nvkm_done(o); \ +} while(0) +#define nvkm_fo32(o,a,d,c) nvkm_fill(32, 2, (o), (a), (d), (c)) +#define nvkm_fo64(o,a,d,c) nvkm_fill(64, 3, (o), (a), (d), (c)) #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h b/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h index 7bd4897a8a2a..5c1261351138 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h @@ -30,7 +30,7 @@ nvkm_mm_initialised(struct nvkm_mm *mm) return mm->heap_nodes; } -int nvkm_mm_init(struct nvkm_mm *, u32 offset, u32 length, u32 block); +int nvkm_mm_init(struct nvkm_mm *, u8 heap, u32 offset, u32 length, u32 block); int nvkm_mm_fini(struct nvkm_mm *); int nvkm_mm_head(struct nvkm_mm *, u8 heap, u8 type, u32 size_max, u32 size_min, u32 align, struct nvkm_mm_node **); @@ -39,9 +39,39 @@ int nvkm_mm_tail(struct nvkm_mm *, u8 heap, u8 type, u32 size_max, void nvkm_mm_free(struct nvkm_mm *, struct nvkm_mm_node **); void nvkm_mm_dump(struct nvkm_mm *, const char *); +static inline u32 +nvkm_mm_heap_size(struct nvkm_mm *mm, u8 heap) +{ + struct nvkm_mm_node *node; + u32 size = 0; + list_for_each_entry(node, &mm->nodes, nl_entry) { + if (node->heap == heap) + size += node->length; + } + return size; +} + static inline bool nvkm_mm_contiguous(struct nvkm_mm_node *node) { return !node->next; } + +static inline u32 +nvkm_mm_addr(struct nvkm_mm_node *node) +{ + if (WARN_ON(!nvkm_mm_contiguous(node))) + return 0; + return node->offset; +} + +static inline u32 +nvkm_mm_size(struct nvkm_mm_node *node) +{ + u32 size = 0; + do { + size += node->length; + } while ((node = node->next)); + return size; +} #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/object.h b/drivers/gpu/drm/nouveau/include/nvkm/core/object.h index 96dda350ada3..916a4b76d430 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/object.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/object.h @@ -1,10 +1,8 @@ #ifndef __NVKM_OBJECT_H__ #define __NVKM_OBJECT_H__ -#include <core/os.h> -#include <core/debug.h> +#include <core/oclass.h> struct nvkm_event; struct nvkm_gpuobj; -struct nvkm_oclass; struct nvkm_object { const struct nvkm_object_func *func; @@ -21,13 +19,20 @@ struct nvkm_object { struct rb_node node; }; +enum nvkm_object_map { + NVKM_OBJECT_MAP_IO, + NVKM_OBJECT_MAP_VA +}; + struct nvkm_object_func { void *(*dtor)(struct nvkm_object *); int (*init)(struct nvkm_object *); int (*fini)(struct nvkm_object *, bool suspend); int (*mthd)(struct nvkm_object *, u32 mthd, void *data, u32 size); int (*ntfy)(struct nvkm_object *, u32 mthd, struct nvkm_event **); - int (*map)(struct nvkm_object *, u64 *addr, u32 *size); + int (*map)(struct nvkm_object *, void *argv, u32 argc, + enum nvkm_object_map *, u64 *addr, u64 *size); + int (*unmap)(struct nvkm_object *); int (*rd08)(struct nvkm_object *, u64 addr, u8 *data); int (*rd16)(struct nvkm_object *, u64 addr, u16 *data); int (*rd32)(struct nvkm_object *, u64 addr, u32 *data); @@ -52,7 +57,9 @@ int nvkm_object_init(struct nvkm_object *); int nvkm_object_fini(struct nvkm_object *, bool suspend); int nvkm_object_mthd(struct nvkm_object *, u32 mthd, void *data, u32 size); int nvkm_object_ntfy(struct nvkm_object *, u32 mthd, struct nvkm_event **); -int nvkm_object_map(struct nvkm_object *, u64 *addr, u32 *size); +int nvkm_object_map(struct nvkm_object *, void *argv, u32 argc, + enum nvkm_object_map *, u64 *addr, u64 *size); +int nvkm_object_unmap(struct nvkm_object *); int nvkm_object_rd08(struct nvkm_object *, u64 addr, u8 *data); int nvkm_object_rd16(struct nvkm_object *, u64 addr, u16 *data); int nvkm_object_rd32(struct nvkm_object *, u64 addr, u32 *data); @@ -66,28 +73,4 @@ bool nvkm_object_insert(struct nvkm_object *); void nvkm_object_remove(struct nvkm_object *); struct nvkm_object *nvkm_object_search(struct nvkm_client *, u64 object, const struct nvkm_object_func *); - -struct nvkm_sclass { - int minver; - int maxver; - s32 oclass; - const struct nvkm_object_func *func; - int (*ctor)(const struct nvkm_oclass *, void *data, u32 size, - struct nvkm_object **); -}; - -struct nvkm_oclass { - int (*ctor)(const struct nvkm_oclass *, void *data, u32 size, - struct nvkm_object **); - struct nvkm_sclass base; - const void *priv; - const void *engn; - u32 handle; - u8 route; - u64 token; - u64 object; - struct nvkm_client *client; - struct nvkm_object *parent; - struct nvkm_engine *engine; -}; #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/oclass.h b/drivers/gpu/drm/nouveau/include/nvkm/core/oclass.h new file mode 100644 index 000000000000..8e1b945d38f3 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/oclass.h @@ -0,0 +1,31 @@ +#ifndef __NVKM_OCLASS_H__ +#define __NVKM_OCLASS_H__ +#include <core/os.h> +#include <core/debug.h> +struct nvkm_oclass; +struct nvkm_object; + +struct nvkm_sclass { + int minver; + int maxver; + s32 oclass; + const struct nvkm_object_func *func; + int (*ctor)(const struct nvkm_oclass *, void *data, u32 size, + struct nvkm_object **); +}; + +struct nvkm_oclass { + int (*ctor)(const struct nvkm_oclass *, void *data, u32 size, + struct nvkm_object **); + struct nvkm_sclass base; + const void *priv; + const void *engn; + u32 handle; + u8 route; + u64 token; + u64 object; + struct nvkm_client *client; + struct nvkm_object *parent; + struct nvkm_engine *engine; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/os.h b/drivers/gpu/drm/nouveau/include/nvkm/core/os.h index cd57e238ddd3..1f0108fdd24a 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/os.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/os.h @@ -1,4 +1,23 @@ #ifndef __NVKM_OS_H__ #define __NVKM_OS_H__ #include <nvif/os.h> + +#ifdef __BIG_ENDIAN +#define ioread16_native ioread16be +#define iowrite16_native iowrite16be +#define ioread32_native ioread32be +#define iowrite32_native iowrite32be +#else +#define ioread16_native ioread16 +#define iowrite16_native iowrite16 +#define ioread32_native ioread32 +#define iowrite32_native iowrite32 +#endif + +#define iowrite64_native(v,p) do { \ + u32 __iomem *_p = (u32 __iomem *)(p); \ + u64 _v = (v); \ + iowrite32_native(lower_32_bits(_v), &_p[0]); \ + iowrite32_native(upper_32_bits(_v), &_p[1]); \ +} while(0) #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/ramht.h b/drivers/gpu/drm/nouveau/include/nvkm/core/ramht.h index 5ee6298991e2..8a48ca67f60d 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/ramht.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/ramht.h @@ -1,6 +1,7 @@ #ifndef __NVKM_RAMHT_H__ #define __NVKM_RAMHT_H__ #include <core/gpuobj.h> +struct nvkm_object; struct nvkm_ramht_data { struct nvkm_gpuobj *inst; diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h index ca9ed3d68f44..a6c21be7537f 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h @@ -33,7 +33,7 @@ void nvkm_subdev_intr(struct nvkm_subdev *); /* subdev logging */ #define nvkm_printk_(s,l,p,f,a...) do { \ const struct nvkm_subdev *_subdev = (s); \ - if (_subdev->debug >= (l)) { \ + if (CONFIG_NOUVEAU_DEBUG >= (l) && _subdev->debug >= (l)) { \ dev_##p(_subdev->device->dev, "%s: "f, \ nvkm_subdev_name[_subdev->index], ##a); \ } \ diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h index d2a6532ce3b9..b672a3b07f55 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h @@ -1,6 +1,7 @@ #ifndef __NVKM_DMA_H__ #define __NVKM_DMA_H__ #include <core/engine.h> +#include <core/object.h> struct nvkm_client; struct nvkm_dmaobj { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h index e1a854e2ade1..f0024fb5a5af 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h @@ -3,6 +3,7 @@ #define nvkm_falcon(p) container_of((p), struct nvkm_falcon, engine) #include <core/engine.h> struct nvkm_fifo_chan; +struct nvkm_gpuobj; enum nvkm_falcon_dmaidx { FALCON_DMAIDX_UCODE = 0, @@ -77,7 +78,7 @@ struct nvkm_falcon_func { void (*load_imem)(struct nvkm_falcon *, void *, u32, u32, u16, u8, bool); void (*load_dmem)(struct nvkm_falcon *, void *, u32, u32, u8); void (*read_dmem)(struct nvkm_falcon *, u32, u32, u8, void *); - void (*bind_context)(struct nvkm_falcon *, struct nvkm_gpuobj *); + void (*bind_context)(struct nvkm_falcon *, struct nvkm_memory *); int (*wait_for_halt)(struct nvkm_falcon *, u32); int (*clear_interrupt)(struct nvkm_falcon *, u32); void (*set_start_addr)(struct nvkm_falcon *, u32 start_addr); @@ -112,7 +113,7 @@ void nvkm_falcon_load_imem(struct nvkm_falcon *, void *, u32, u32, u16, u8, bool); void nvkm_falcon_load_dmem(struct nvkm_falcon *, void *, u32, u32, u8); void nvkm_falcon_read_dmem(struct nvkm_falcon *, u32, u32, u8, void *); -void nvkm_falcon_bind_context(struct nvkm_falcon *, struct nvkm_gpuobj *); +void nvkm_falcon_bind_context(struct nvkm_falcon *, struct nvkm_memory *); void nvkm_falcon_set_start_addr(struct nvkm_falcon *, u32); void nvkm_falcon_start(struct nvkm_falcon *); int nvkm_falcon_wait_for_halt(struct nvkm_falcon *, u32); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h index f00527b36acc..e42d686fbd8b 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h @@ -1,6 +1,7 @@ #ifndef __NVKM_FIFO_H__ #define __NVKM_FIFO_H__ #include <core/engine.h> +#include <core/object.h> #include <core/event.h> #define NVKM_FIFO_CHID_NR 4096 @@ -21,7 +22,7 @@ struct nvkm_fifo_chan { u16 chid; struct nvkm_gpuobj *inst; struct nvkm_gpuobj *push; - struct nvkm_vm *vm; + struct nvkm_vmm *vmm; void __iomem *user; u64 addr; u32 size; diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h index d3071b5a4f98..ffa963939e15 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h @@ -8,17 +8,22 @@ struct nvkm_bar { struct nvkm_subdev subdev; spinlock_t lock; + bool bar2; /* whether the BAR supports to be ioremapped WC or should be uncached */ bool iomap_uncached; }; +struct nvkm_vmm *nvkm_bar_bar1_vmm(struct nvkm_device *); +void nvkm_bar_bar2_init(struct nvkm_device *); +void nvkm_bar_bar2_fini(struct nvkm_device *); +struct nvkm_vmm *nvkm_bar_bar2_vmm(struct nvkm_device *); void nvkm_bar_flush(struct nvkm_bar *); -struct nvkm_vm *nvkm_bar_kmap(struct nvkm_bar *); -int nvkm_bar_umap(struct nvkm_bar *, u64 size, int type, struct nvkm_vma *); int nv50_bar_new(struct nvkm_device *, int, struct nvkm_bar **); int g84_bar_new(struct nvkm_device *, int, struct nvkm_bar **); int gf100_bar_new(struct nvkm_device *, int, struct nvkm_bar **); int gk20a_bar_new(struct nvkm_device *, int, struct nvkm_bar **); +int gm107_bar_new(struct nvkm_device *, int, struct nvkm_bar **); +int gm20b_bar_new(struct nvkm_device *, int, struct nvkm_bar **); #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h index 28d513fbf44c..a00fd2e59215 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h @@ -1,8 +1,7 @@ #ifndef __NVKM_FB_H__ #define __NVKM_FB_H__ #include <core/subdev.h> - -#include <subdev/mmu.h> +#include <core/mm.h> /* memory type/access flags, do not match hardware values */ #define NV_MEM_ACCESS_RO 1 @@ -21,22 +20,6 @@ #define NVKM_RAM_TYPE_VM 0x7f #define NV_MEM_COMP_VM 0x03 -struct nvkm_mem { - struct drm_device *dev; - - struct nvkm_vma bar_vma; - struct nvkm_vma vma[2]; - u8 page_shift; - - struct nvkm_mm_node *tag; - struct nvkm_mm_node *mem; - dma_addr_t *pages; - u32 memtype; - u64 offset; - u64 size; - struct sg_table *sg; -}; - struct nvkm_fb_tile { struct nvkm_mm_node *tag; u32 addr; @@ -50,6 +33,7 @@ struct nvkm_fb { struct nvkm_subdev subdev; struct nvkm_ram *ram; + struct nvkm_mm tags; struct { struct nvkm_fb_tile region[16]; @@ -62,7 +46,6 @@ struct nvkm_fb { struct nvkm_memory *mmu_wr; }; -bool nvkm_fb_memtype_valid(struct nvkm_fb *, u32 memtype); void nvkm_fb_tile_init(struct nvkm_fb *, int region, u32 addr, u32 size, u32 pitch, u32 flags, struct nvkm_fb_tile *); void nvkm_fb_tile_fini(struct nvkm_fb *, int region, struct nvkm_fb_tile *); @@ -129,8 +112,11 @@ struct nvkm_ram { u64 size; #define NVKM_RAM_MM_SHIFT 12 +#define NVKM_RAM_MM_ANY (NVKM_MM_HEAP_ANY + 0) +#define NVKM_RAM_MM_NORMAL (NVKM_MM_HEAP_ANY + 1) +#define NVKM_RAM_MM_NOMAP (NVKM_MM_HEAP_ANY + 2) +#define NVKM_RAM_MM_MIXED (NVKM_MM_HEAP_ANY + 3) struct nvkm_mm vram; - struct nvkm_mm tags; u64 stolen; int ranks; @@ -147,6 +133,10 @@ struct nvkm_ram { struct nvkm_ram_data target; }; +int +nvkm_ram_get(struct nvkm_device *, u8 heap, u8 type, u8 page, u64 size, + bool contig, bool back, struct nvkm_memory **); + struct nvkm_ram_func { u64 upper; u32 (*probe_fbp)(const struct nvkm_ram_func *, struct nvkm_device *, @@ -157,14 +147,8 @@ struct nvkm_ram_func { void *(*dtor)(struct nvkm_ram *); int (*init)(struct nvkm_ram *); - int (*get)(struct nvkm_ram *, u64 size, u32 align, u32 size_nc, - u32 type, struct nvkm_mem **); - void (*put)(struct nvkm_ram *, struct nvkm_mem **); - int (*calc)(struct nvkm_ram *, u32 freq); int (*prog)(struct nvkm_ram *); void (*tidy)(struct nvkm_ram *); }; - -extern const u8 gf100_pte_storage_type_map[256]; #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h index 40f845e31272..8111c0c3c5ec 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h @@ -9,6 +9,7 @@ struct nvkm_instmem { spinlock_t lock; struct list_head list; + struct list_head boot; u32 reserved; struct nvkm_memory *vbios; diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h index cd755baf9cab..4a224fd22e48 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h @@ -14,8 +14,7 @@ struct nvkm_ltc { u32 num_tags; u32 tag_base; - struct nvkm_mm tags; - struct nvkm_mm_node *tag_ram; + struct nvkm_memory *tag_ram; int zbc_min; int zbc_max; @@ -23,9 +22,7 @@ struct nvkm_ltc { u32 zbc_depth[NVKM_LTC_MAX_ZBC_CNT]; }; -int nvkm_ltc_tags_alloc(struct nvkm_ltc *, u32 count, struct nvkm_mm_node **); -void nvkm_ltc_tags_free(struct nvkm_ltc *, struct nvkm_mm_node **); -void nvkm_ltc_tags_clear(struct nvkm_ltc *, u32 first, u32 count); +void nvkm_ltc_tags_clear(struct nvkm_device *, u32 first, u32 count); int nvkm_ltc_zbc_color_get(struct nvkm_ltc *, int index, const u32[4]); int nvkm_ltc_zbc_depth_get(struct nvkm_ltc *, int index, const u32); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h index dcd3deff27a4..975c42f620a0 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h @@ -1,68 +1,130 @@ #ifndef __NVKM_MMU_H__ #define __NVKM_MMU_H__ #include <core/subdev.h> -#include <core/mm.h> -struct nvkm_device; -struct nvkm_mem; - -struct nvkm_vm_pgt { - struct nvkm_memory *mem[2]; - u32 refcount[2]; -}; - -struct nvkm_vm_pgd { - struct list_head head; - struct nvkm_gpuobj *obj; -}; struct nvkm_vma { struct list_head head; - int refcount; - struct nvkm_vm *vm; - struct nvkm_mm_node *node; - u64 offset; - u32 access; + struct rb_node tree; + u64 addr; + u64 size:50; + bool mapref:1; /* PTs (de)referenced on (un)map (vs pre-allocated). */ + bool sparse:1; /* Unmapped PDEs/PTEs will not trigger MMU faults. */ +#define NVKM_VMA_PAGE_NONE 7 + u8 page:3; /* Requested page type (index, or NONE for automatic). */ + u8 refd:3; /* Current page type (index, or NONE for unreferenced). */ + bool used:1; /* Region allocated. */ + bool part:1; /* Region was split from an allocated region by map(). */ + bool user:1; /* Region user-allocated. */ + bool busy:1; /* Region busy (for temporarily preventing user access). */ + struct nvkm_memory *memory; /* Memory currently mapped into VMA. */ + struct nvkm_tags *tags; /* Compression tag reference. */ }; -struct nvkm_vm { +struct nvkm_vmm { + const struct nvkm_vmm_func *func; struct nvkm_mmu *mmu; - + const char *name; + u32 debug; + struct kref kref; struct mutex mutex; - struct nvkm_mm mm; - struct kref refcount; - struct list_head pgd_list; + u64 start; + u64 limit; + + struct nvkm_vmm_pt *pd; + struct list_head join; + + struct list_head list; + struct rb_root free; + struct rb_root root; + + bool bootstrapped; atomic_t engref[NVKM_SUBDEV_NR]; - struct nvkm_vm_pgt *pgt; - u32 fpde; - u32 lpde; + dma_addr_t null; + void *nullp; }; -int nvkm_vm_new(struct nvkm_device *, u64 offset, u64 length, u64 mm_offset, - struct lock_class_key *, struct nvkm_vm **); -int nvkm_vm_ref(struct nvkm_vm *, struct nvkm_vm **, struct nvkm_gpuobj *pgd); -int nvkm_vm_boot(struct nvkm_vm *, u64 size); -int nvkm_vm_get(struct nvkm_vm *, u64 size, u32 page_shift, u32 access, - struct nvkm_vma *); -void nvkm_vm_put(struct nvkm_vma *); -void nvkm_vm_map(struct nvkm_vma *, struct nvkm_mem *); -void nvkm_vm_map_at(struct nvkm_vma *, u64 offset, struct nvkm_mem *); -void nvkm_vm_unmap(struct nvkm_vma *); -void nvkm_vm_unmap_at(struct nvkm_vma *, u64 offset, u64 length); +int nvkm_vmm_new(struct nvkm_device *, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *, const char *name, struct nvkm_vmm **); +struct nvkm_vmm *nvkm_vmm_ref(struct nvkm_vmm *); +void nvkm_vmm_unref(struct nvkm_vmm **); +int nvkm_vmm_boot(struct nvkm_vmm *); +int nvkm_vmm_join(struct nvkm_vmm *, struct nvkm_memory *inst); +void nvkm_vmm_part(struct nvkm_vmm *, struct nvkm_memory *inst); +int nvkm_vmm_get(struct nvkm_vmm *, u8 page, u64 size, struct nvkm_vma **); +void nvkm_vmm_put(struct nvkm_vmm *, struct nvkm_vma **); + +struct nvkm_vmm_map { + struct nvkm_memory *memory; + u64 offset; + + struct nvkm_mm_node *mem; + struct scatterlist *sgl; + dma_addr_t *dma; + u64 off; + + const struct nvkm_vmm_page *page; + + struct nvkm_tags *tags; + u64 next; + u64 type; + u64 ctag; +}; + +int nvkm_vmm_map(struct nvkm_vmm *, struct nvkm_vma *, void *argv, u32 argc, + struct nvkm_vmm_map *); +void nvkm_vmm_unmap(struct nvkm_vmm *, struct nvkm_vma *); + +struct nvkm_memory *nvkm_umem_search(struct nvkm_client *, u64); +struct nvkm_vmm *nvkm_uvmm_search(struct nvkm_client *, u64 handle); struct nvkm_mmu { const struct nvkm_mmu_func *func; struct nvkm_subdev subdev; - u64 limit; u8 dma_bits; - u8 lpg_shift; + + int heap_nr; + struct { +#define NVKM_MEM_VRAM 0x01 +#define NVKM_MEM_HOST 0x02 +#define NVKM_MEM_COMP 0x04 +#define NVKM_MEM_DISP 0x08 + u8 type; + u64 size; + } heap[4]; + + int type_nr; + struct { +#define NVKM_MEM_KIND 0x10 +#define NVKM_MEM_MAPPABLE 0x20 +#define NVKM_MEM_COHERENT 0x40 +#define NVKM_MEM_UNCACHED 0x80 + u8 type; + u8 heap; + } type[16]; + + struct nvkm_vmm *vmm; + + struct { + struct mutex mutex; + struct list_head list; + } ptc, ptp; + + struct nvkm_device_oclass user; }; int nv04_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int nv41_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int nv44_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int nv50_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int g84_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int gf100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int gk104_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int gk20a_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int gm200_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int gm20b_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int gp100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int gp10b_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h index 1bfd93b85575..9841f076da2e 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h @@ -97,4 +97,5 @@ int gt215_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gf119_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gm107_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gm200_therm_new(struct nvkm_device *, int, struct nvkm_therm **); +int gp100_therm_new(struct nvkm_device *, int, struct nvkm_therm **); #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index f98f800cc011..ece650a0c5f9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -34,6 +34,7 @@ #include "nouveau_gem.h" #include "nouveau_chan.h" #include "nouveau_abi16.h" +#include "nouveau_vmm.h" static struct nouveau_abi16 * nouveau_abi16(struct drm_file *file_priv) @@ -134,7 +135,7 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16, } if (chan->ntfy) { - nouveau_bo_vma_del(chan->ntfy, &chan->ntfy_vma); + nouveau_vma_del(&chan->ntfy_vma); nouveau_bo_unpin(chan->ntfy); drm_gem_object_unreference_unlocked(&chan->ntfy->gem); } @@ -184,29 +185,33 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) getparam->value = device->info.chipset; break; case NOUVEAU_GETPARAM_PCI_VENDOR: - if (nvxx_device(device)->func->pci) + if (device->info.platform != NV_DEVICE_INFO_V0_SOC) getparam->value = dev->pdev->vendor; else getparam->value = 0; break; case NOUVEAU_GETPARAM_PCI_DEVICE: - if (nvxx_device(device)->func->pci) + if (device->info.platform != NV_DEVICE_INFO_V0_SOC) getparam->value = dev->pdev->device; else getparam->value = 0; break; case NOUVEAU_GETPARAM_BUS_TYPE: - if (!nvxx_device(device)->func->pci) - getparam->value = 3; - else - if (pci_find_capability(dev->pdev, PCI_CAP_ID_AGP)) - getparam->value = 0; - else - if (!pci_is_pcie(dev->pdev)) - getparam->value = 1; - else - getparam->value = 2; - break; + switch (device->info.platform) { + case NV_DEVICE_INFO_V0_AGP : getparam->value = 0; break; + case NV_DEVICE_INFO_V0_PCI : getparam->value = 1; break; + case NV_DEVICE_INFO_V0_PCIE: getparam->value = 2; break; + case NV_DEVICE_INFO_V0_SOC : getparam->value = 3; break; + case NV_DEVICE_INFO_V0_IGP : + if (!pci_is_pcie(dev->pdev)) + getparam->value = 1; + else + getparam->value = 2; + break; + default: + WARN_ON(1); + break; + } case NOUVEAU_GETPARAM_FB_SIZE: getparam->value = drm->gem.vram_available; break; @@ -329,8 +334,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) goto done; if (device->info.family >= NV_DEVICE_INFO_V0_TESLA) { - ret = nouveau_bo_vma_add(chan->ntfy, cli->vm, - &chan->ntfy_vma); + ret = nouveau_vma_new(chan->ntfy, &cli->vmm, &chan->ntfy_vma); if (ret) goto done; } @@ -340,7 +344,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) if (ret) goto done; - ret = nvkm_mm_init(&chan->heap, 0, PAGE_SIZE, 1); + ret = nvkm_mm_init(&chan->heap, 0, 0, PAGE_SIZE, 1); done: if (ret) nouveau_abi16_chan_fini(abi16, chan); @@ -548,8 +552,8 @@ nouveau_abi16_ioctl_notifierobj_alloc(ABI16_IOCTL_ARGS) if (device->info.family >= NV_DEVICE_INFO_V0_TESLA) { args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_VM; - args.start += chan->ntfy_vma.offset; - args.limit += chan->ntfy_vma.offset; + args.start += chan->ntfy_vma->addr; + args.limit += chan->ntfy_vma->addr; } else if (drm->agp.bridge) { args.target = NV_DMA_V0_TARGET_AGP; diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h index 841cc556fad8..327747680324 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.h +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h @@ -23,7 +23,7 @@ struct nouveau_abi16_chan { struct nouveau_channel *chan; struct list_head notifiers; struct nouveau_bo *ntfy; - struct nvkm_vma ntfy_vma; + struct nouveau_vma *ntfy_vma; struct nvkm_mm heap; }; diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c index dd6fba55ad5d..66bf2aff4a3e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c @@ -1478,9 +1478,13 @@ parse_dcb20_entry(struct drm_device *dev, struct dcb_table *dcb, case 1: entry->dpconf.link_bw = 270000; break; - default: + case 2: entry->dpconf.link_bw = 540000; break; + case 3: + default: + entry->dpconf.link_bw = 810000; + break; } switch ((conf & 0x0f000000) >> 24) { case 0xf: @@ -1964,7 +1968,7 @@ static int load_nv17_hw_sequencer_ucode(struct drm_device *dev, * The microcode entries are found by the "HWSQ" signature. */ - const uint8_t hwsq_signature[] = { 'H', 'W', 'S', 'Q' }; + static const uint8_t hwsq_signature[] = { 'H', 'W', 'S', 'Q' }; const int sz = sizeof(hwsq_signature); int hwsq_offset; @@ -1980,7 +1984,7 @@ uint8_t *nouveau_bios_embedded_edid(struct drm_device *dev) { struct nouveau_drm *drm = nouveau_drm(dev); struct nvbios *bios = &drm->vbios; - const uint8_t edid_sig[] = { + static const uint8_t edid_sig[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 }; uint16_t offset = 0; uint16_t newoffset; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index e427f80344c4..2615912430cc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -37,6 +37,12 @@ #include "nouveau_bo.h" #include "nouveau_ttm.h" #include "nouveau_gem.h" +#include "nouveau_mem.h" +#include "nouveau_vmm.h" + +#include <nvif/class.h> +#include <nvif/if500b.h> +#include <nvif/if900b.h> /* * NV10-NV40 tiling helpers @@ -48,8 +54,7 @@ nv10_bo_update_tile_region(struct drm_device *dev, struct nouveau_drm_tile *reg, { struct nouveau_drm *drm = nouveau_drm(dev); int i = reg - drm->tile.reg; - struct nvkm_device *device = nvxx_device(&drm->client.device); - struct nvkm_fb *fb = device->fb; + struct nvkm_fb *fb = nvxx_fb(&drm->client.device); struct nvkm_fb_tile *tile = &fb->tile.region[i]; nouveau_fence_unref(®->fence); @@ -97,7 +102,7 @@ nv10_bo_put_tile_region(struct drm_device *dev, struct nouveau_drm_tile *tile, static struct nouveau_drm_tile * nv10_bo_set_tiling(struct drm_device *dev, u32 addr, - u32 size, u32 pitch, u32 flags) + u32 size, u32 pitch, u32 zeta) { struct nouveau_drm *drm = nouveau_drm(dev); struct nvkm_fb *fb = nvxx_fb(&drm->client.device); @@ -120,8 +125,7 @@ nv10_bo_set_tiling(struct drm_device *dev, u32 addr, } if (found) - nv10_bo_update_tile_region(dev, found, addr, size, - pitch, flags); + nv10_bo_update_tile_region(dev, found, addr, size, pitch, zeta); return found; } @@ -155,27 +159,27 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, u32 flags, struct nvif_device *device = &drm->client.device; if (device->info.family < NV_DEVICE_INFO_V0_TESLA) { - if (nvbo->tile_mode) { + if (nvbo->mode) { if (device->info.chipset >= 0x40) { *align = 65536; - *size = roundup_64(*size, 64 * nvbo->tile_mode); + *size = roundup_64(*size, 64 * nvbo->mode); } else if (device->info.chipset >= 0x30) { *align = 32768; - *size = roundup_64(*size, 64 * nvbo->tile_mode); + *size = roundup_64(*size, 64 * nvbo->mode); } else if (device->info.chipset >= 0x20) { *align = 16384; - *size = roundup_64(*size, 64 * nvbo->tile_mode); + *size = roundup_64(*size, 64 * nvbo->mode); } else if (device->info.chipset >= 0x10) { *align = 16384; - *size = roundup_64(*size, 32 * nvbo->tile_mode); + *size = roundup_64(*size, 32 * nvbo->mode); } } } else { - *size = roundup_64(*size, (1 << nvbo->page_shift)); - *align = max((1 << nvbo->page_shift), *align); + *size = roundup_64(*size, (1 << nvbo->page)); + *align = max((1 << nvbo->page), *align); } *size = roundup_64(*size, PAGE_SIZE); @@ -187,11 +191,13 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, struct sg_table *sg, struct reservation_object *robj, struct nouveau_bo **pnvbo) { - struct nouveau_drm *drm = nouveau_drm(cli->dev); + struct nouveau_drm *drm = cli->drm; struct nouveau_bo *nvbo; + struct nvif_mmu *mmu = &cli->mmu; + struct nvif_vmm *vmm = &cli->vmm.vmm; size_t acc_size; - int ret; int type = ttm_bo_type_device; + int ret, i, pi = -1; if (!size) { NV_WARN(drm, "skipped size %016llx\n", size); @@ -207,19 +213,80 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, INIT_LIST_HEAD(&nvbo->head); INIT_LIST_HEAD(&nvbo->entry); INIT_LIST_HEAD(&nvbo->vma_list); - nvbo->tile_mode = tile_mode; - nvbo->tile_flags = tile_flags; nvbo->bo.bdev = &drm->ttm.bdev; nvbo->cli = cli; - if (!nvxx_device(&drm->client.device)->func->cpu_coherent) - nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED; + /* This is confusing, and doesn't actually mean we want an uncached + * mapping, but is what NOUVEAU_GEM_DOMAIN_COHERENT gets translated + * into in nouveau_gem_new(). + */ + if (flags & TTM_PL_FLAG_UNCACHED) { + /* Determine if we can get a cache-coherent map, forcing + * uncached mapping if we can't. + */ + if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) + nvbo->force_coherent = true; + } + + if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) { + nvbo->kind = (tile_flags & 0x0000ff00) >> 8; + if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) { + kfree(nvbo); + return -EINVAL; + } + + nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind; + } else + if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) { + nvbo->kind = (tile_flags & 0x00007f00) >> 8; + nvbo->comp = (tile_flags & 0x00030000) >> 16; + if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) { + kfree(nvbo); + return -EINVAL; + } + } else { + nvbo->zeta = (tile_flags & 0x00000007); + } + nvbo->mode = tile_mode; + nvbo->contig = !(tile_flags & NOUVEAU_GEM_TILE_NONCONTIG); + + /* Determine the desirable target GPU page size for the buffer. */ + for (i = 0; i < vmm->page_nr; i++) { + /* Because we cannot currently allow VMM maps to fail + * during buffer migration, we need to determine page + * size for the buffer up-front, and pre-allocate its + * page tables. + * + * Skip page sizes that can't support needed domains. + */ + if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE && + (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram) + continue; + if ((flags & TTM_PL_FLAG_TT ) && !vmm->page[i].host) + continue; + + /* Select this page size if it's the first that supports + * the potential memory domains, or when it's compatible + * with the requested compression settings. + */ + if (pi < 0 || !nvbo->comp || vmm->page[i].comp) + pi = i; + + /* Stop once the buffer is larger than the current page size. */ + if (size >= 1ULL << vmm->page[i].shift) + break; + } + + if (WARN_ON(pi < 0)) + return -EINVAL; - nvbo->page_shift = 12; - if (drm->client.vm) { - if (!(flags & TTM_PL_FLAG_TT) && size > 256 * 1024) - nvbo->page_shift = drm->client.vm->mmu->lpg_shift; + /* Disable compression if suitable settings couldn't be found. */ + if (nvbo->comp && !vmm->page[pi].comp) { + if (mmu->object.oclass >= NVIF_CLASS_MMU_GF100) + nvbo->kind = mmu->kind[nvbo->kind]; + nvbo->comp = 0; } + nvbo->page = vmm->page[pi].shift; nouveau_bo_fixup_align(nvbo, flags, &align, &size); nvbo->bo.mem.num_pages = size >> PAGE_SHIFT; @@ -262,7 +329,7 @@ set_placement_range(struct nouveau_bo *nvbo, uint32_t type) unsigned i, fpfn, lpfn; if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CELSIUS && - nvbo->tile_mode && (type & TTM_PL_FLAG_VRAM) && + nvbo->mode && (type & TTM_PL_FLAG_VRAM) && nvbo->bo.mem.num_pages < vram_pages / 4) { /* * Make sure that the color and depth buffers are handled @@ -270,7 +337,7 @@ set_placement_range(struct nouveau_bo *nvbo, uint32_t type) * speed up when alpha-blending and depth-test are enabled * at the same time. */ - if (nvbo->tile_flags & NOUVEAU_GEM_TILE_ZETA) { + if (nvbo->zeta) { fpfn = vram_pages / 2; lpfn = ~0; } else { @@ -321,14 +388,10 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype, bool contig) if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA && memtype == TTM_PL_FLAG_VRAM && contig) { - if (nvbo->tile_flags & NOUVEAU_GEM_TILE_NONCONTIG) { - if (bo->mem.mem_type == TTM_PL_VRAM) { - struct nvkm_mem *mem = bo->mem.mm_node; - if (!nvkm_mm_contiguous(mem->mem)) - evict = true; - } - nvbo->tile_flags &= ~NOUVEAU_GEM_TILE_NONCONTIG; + if (!nvbo->contig) { + nvbo->contig = true; force = true; + evict = true; } } @@ -376,7 +439,7 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype, bool contig) out: if (force && ret) - nvbo->tile_flags |= NOUVEAU_GEM_TILE_NONCONTIG; + nvbo->contig = false; ttm_bo_unreserve(bo); return ret; } @@ -446,7 +509,6 @@ void nouveau_bo_sync_for_device(struct nouveau_bo *nvbo) { struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); - struct nvkm_device *device = nvxx_device(&drm->client.device); struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm; int i; @@ -458,7 +520,8 @@ nouveau_bo_sync_for_device(struct nouveau_bo *nvbo) return; for (i = 0; i < ttm_dma->ttm.num_pages; i++) - dma_sync_single_for_device(device->dev, ttm_dma->dma_address[i], + dma_sync_single_for_device(drm->dev->dev, + ttm_dma->dma_address[i], PAGE_SIZE, DMA_TO_DEVICE); } @@ -466,7 +529,6 @@ void nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo) { struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); - struct nvkm_device *device = nvxx_device(&drm->client.device); struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm; int i; @@ -478,7 +540,7 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo) return; for (i = 0; i < ttm_dma->ttm.num_pages; i++) - dma_sync_single_for_cpu(device->dev, ttm_dma->dma_address[i], + dma_sync_single_for_cpu(drm->dev->dev, ttm_dma->dma_address[i], PAGE_SIZE, DMA_FROM_DEVICE); } @@ -568,6 +630,7 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, struct ttm_mem_type_manager *man) { struct nouveau_drm *drm = nouveau_bdev(bdev); + struct nvif_mmu *mmu = &drm->client.mmu; switch (type) { case TTM_PL_SYSTEM: @@ -584,7 +647,8 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { /* Some BARs do not support being ioremapped WC */ - if (nvxx_bar(&drm->client.device)->iomap_uncached) { + const u8 type = mmu->type[drm->ttm.type_vram].type; + if (type & NVIF_MEM_UNCACHED) { man->available_caching = TTM_PL_FLAG_UNCACHED; man->default_caching = TTM_PL_FLAG_UNCACHED; } @@ -659,14 +723,14 @@ static int nve0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg) { - struct nvkm_mem *mem = old_reg->mm_node; + struct nouveau_mem *mem = nouveau_mem(old_reg); int ret = RING_SPACE(chan, 10); if (ret == 0) { BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8); - OUT_RING (chan, upper_32_bits(mem->vma[0].offset)); - OUT_RING (chan, lower_32_bits(mem->vma[0].offset)); - OUT_RING (chan, upper_32_bits(mem->vma[1].offset)); - OUT_RING (chan, lower_32_bits(mem->vma[1].offset)); + OUT_RING (chan, upper_32_bits(mem->vma[0].addr)); + OUT_RING (chan, lower_32_bits(mem->vma[0].addr)); + OUT_RING (chan, upper_32_bits(mem->vma[1].addr)); + OUT_RING (chan, lower_32_bits(mem->vma[1].addr)); OUT_RING (chan, PAGE_SIZE); OUT_RING (chan, PAGE_SIZE); OUT_RING (chan, PAGE_SIZE); @@ -691,9 +755,9 @@ static int nvc0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg) { - struct nvkm_mem *mem = old_reg->mm_node; - u64 src_offset = mem->vma[0].offset; - u64 dst_offset = mem->vma[1].offset; + struct nouveau_mem *mem = nouveau_mem(old_reg); + u64 src_offset = mem->vma[0].addr; + u64 dst_offset = mem->vma[1].addr; u32 page_count = new_reg->num_pages; int ret; @@ -729,9 +793,9 @@ static int nvc0_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg) { - struct nvkm_mem *mem = old_reg->mm_node; - u64 src_offset = mem->vma[0].offset; - u64 dst_offset = mem->vma[1].offset; + struct nouveau_mem *mem = nouveau_mem(old_reg); + u64 src_offset = mem->vma[0].addr; + u64 dst_offset = mem->vma[1].addr; u32 page_count = new_reg->num_pages; int ret; @@ -768,9 +832,9 @@ static int nva3_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg) { - struct nvkm_mem *mem = old_reg->mm_node; - u64 src_offset = mem->vma[0].offset; - u64 dst_offset = mem->vma[1].offset; + struct nouveau_mem *mem = nouveau_mem(old_reg); + u64 src_offset = mem->vma[0].addr; + u64 dst_offset = mem->vma[1].addr; u32 page_count = new_reg->num_pages; int ret; @@ -806,14 +870,14 @@ static int nv98_bo_move_exec(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg) { - struct nvkm_mem *mem = old_reg->mm_node; + struct nouveau_mem *mem = nouveau_mem(old_reg); int ret = RING_SPACE(chan, 7); if (ret == 0) { BEGIN_NV04(chan, NvSubCopy, 0x0320, 6); - OUT_RING (chan, upper_32_bits(mem->vma[0].offset)); - OUT_RING (chan, lower_32_bits(mem->vma[0].offset)); - OUT_RING (chan, upper_32_bits(mem->vma[1].offset)); - OUT_RING (chan, lower_32_bits(mem->vma[1].offset)); + OUT_RING (chan, upper_32_bits(mem->vma[0].addr)); + OUT_RING (chan, lower_32_bits(mem->vma[0].addr)); + OUT_RING (chan, upper_32_bits(mem->vma[1].addr)); + OUT_RING (chan, lower_32_bits(mem->vma[1].addr)); OUT_RING (chan, 0x00000000 /* COPY */); OUT_RING (chan, new_reg->num_pages << PAGE_SHIFT); } @@ -824,15 +888,15 @@ static int nv84_bo_move_exec(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg) { - struct nvkm_mem *mem = old_reg->mm_node; + struct nouveau_mem *mem = nouveau_mem(old_reg); int ret = RING_SPACE(chan, 7); if (ret == 0) { BEGIN_NV04(chan, NvSubCopy, 0x0304, 6); OUT_RING (chan, new_reg->num_pages << PAGE_SHIFT); - OUT_RING (chan, upper_32_bits(mem->vma[0].offset)); - OUT_RING (chan, lower_32_bits(mem->vma[0].offset)); - OUT_RING (chan, upper_32_bits(mem->vma[1].offset)); - OUT_RING (chan, lower_32_bits(mem->vma[1].offset)); + OUT_RING (chan, upper_32_bits(mem->vma[0].addr)); + OUT_RING (chan, lower_32_bits(mem->vma[0].addr)); + OUT_RING (chan, upper_32_bits(mem->vma[1].addr)); + OUT_RING (chan, lower_32_bits(mem->vma[1].addr)); OUT_RING (chan, 0x00000000 /* MODE_COPY, QUERY_NONE */); } return ret; @@ -858,12 +922,12 @@ static int nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg) { - struct nvkm_mem *mem = old_reg->mm_node; + struct nouveau_mem *mem = nouveau_mem(old_reg); u64 length = (new_reg->num_pages << PAGE_SHIFT); - u64 src_offset = mem->vma[0].offset; - u64 dst_offset = mem->vma[1].offset; - int src_tiled = !!mem->memtype; - int dst_tiled = !!((struct nvkm_mem *)new_reg->mm_node)->memtype; + u64 src_offset = mem->vma[0].addr; + u64 dst_offset = mem->vma[1].addr; + int src_tiled = !!mem->kind; + int dst_tiled = !!nouveau_mem(new_reg)->kind; int ret; while (length) { @@ -1000,25 +1064,31 @@ static int nouveau_bo_move_prep(struct nouveau_drm *drm, struct ttm_buffer_object *bo, struct ttm_mem_reg *reg) { - struct nvkm_mem *old_mem = bo->mem.mm_node; - struct nvkm_mem *new_mem = reg->mm_node; - u64 size = (u64)reg->num_pages << PAGE_SHIFT; + struct nouveau_mem *old_mem = nouveau_mem(&bo->mem); + struct nouveau_mem *new_mem = nouveau_mem(reg); + struct nvif_vmm *vmm = &drm->client.vmm.vmm; int ret; - ret = nvkm_vm_get(drm->client.vm, size, old_mem->page_shift, - NV_MEM_ACCESS_RW, &old_mem->vma[0]); + ret = nvif_vmm_get(vmm, LAZY, false, old_mem->mem.page, 0, + old_mem->mem.size, &old_mem->vma[0]); if (ret) return ret; - ret = nvkm_vm_get(drm->client.vm, size, new_mem->page_shift, - NV_MEM_ACCESS_RW, &old_mem->vma[1]); + ret = nvif_vmm_get(vmm, LAZY, false, new_mem->mem.page, 0, + new_mem->mem.size, &old_mem->vma[1]); + if (ret) + goto done; + + ret = nouveau_mem_map(old_mem, vmm, &old_mem->vma[0]); + if (ret) + goto done; + + ret = nouveau_mem_map(new_mem, vmm, &old_mem->vma[1]); +done: if (ret) { - nvkm_vm_put(&old_mem->vma[0]); - return ret; + nvif_vmm_put(vmm, &old_mem->vma[1]); + nvif_vmm_put(vmm, &old_mem->vma[0]); } - - nvkm_vm_map(&old_mem->vma[0], old_mem); - nvkm_vm_map(&old_mem->vma[1], new_mem); return 0; } @@ -1200,21 +1270,23 @@ static void nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, bool evict, struct ttm_mem_reg *new_reg) { + struct nouveau_mem *mem = new_reg ? nouveau_mem(new_reg) : NULL; struct nouveau_bo *nvbo = nouveau_bo(bo); - struct nvkm_vma *vma; + struct nouveau_vma *vma; /* ttm can now (stupidly) pass the driver bos it didn't create... */ if (bo->destroy != nouveau_bo_del_ttm) return; - list_for_each_entry(vma, &nvbo->vma_list, head) { - if (new_reg && new_reg->mem_type != TTM_PL_SYSTEM && - (new_reg->mem_type == TTM_PL_VRAM || - nvbo->page_shift != vma->vm->mmu->lpg_shift)) { - nvkm_vm_map(vma, new_reg->mm_node); - } else { + if (mem && new_reg->mem_type != TTM_PL_SYSTEM && + mem->mem.page == nvbo->page) { + list_for_each_entry(vma, &nvbo->vma_list, head) { + nouveau_vma_map(vma, mem); + } + } else { + list_for_each_entry(vma, &nvbo->vma_list, head) { WARN_ON(ttm_bo_wait(bo, false, false)); - nvkm_vm_unmap(vma); + nouveau_vma_unmap(vma); } } } @@ -1234,8 +1306,7 @@ nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_reg, if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) { *new_tile = nv10_bo_set_tiling(dev, offset, new_reg->size, - nvbo->tile_mode, - nvbo->tile_flags); + nvbo->mode, nvbo->zeta); } return 0; @@ -1331,8 +1402,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg) struct ttm_mem_type_manager *man = &bdev->man[reg->mem_type]; struct nouveau_drm *drm = nouveau_bdev(bdev); struct nvkm_device *device = nvxx_device(&drm->client.device); - struct nvkm_mem *mem = reg->mm_node; - int ret; + struct nouveau_mem *mem = nouveau_mem(reg); reg->bus.addr = NULL; reg->bus.offset = 0; @@ -1353,7 +1423,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg) reg->bus.is_iomem = !drm->agp.cma; } #endif - if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA || !mem->memtype) + if (drm->client.mem->oclass < NVIF_CLASS_MEM_NV50 || !mem->kind) /* untiled */ break; /* fallthrough, tiled memory */ @@ -1361,19 +1431,40 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg) reg->bus.offset = reg->start << PAGE_SHIFT; reg->bus.base = device->func->resource_addr(device, 1); reg->bus.is_iomem = true; - if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { - struct nvkm_bar *bar = nvxx_bar(&drm->client.device); - int page_shift = 12; - if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_FERMI) - page_shift = mem->page_shift; + if (drm->client.mem->oclass >= NVIF_CLASS_MEM_NV50) { + union { + struct nv50_mem_map_v0 nv50; + struct gf100_mem_map_v0 gf100; + } args; + u64 handle, length; + u32 argc = 0; + int ret; + + switch (mem->mem.object.oclass) { + case NVIF_CLASS_MEM_NV50: + args.nv50.version = 0; + args.nv50.ro = 0; + args.nv50.kind = mem->kind; + args.nv50.comp = mem->comp; + break; + case NVIF_CLASS_MEM_GF100: + args.gf100.version = 0; + args.gf100.ro = 0; + args.gf100.kind = mem->kind; + break; + default: + WARN_ON(1); + break; + } - ret = nvkm_bar_umap(bar, mem->size << 12, page_shift, - &mem->bar_vma); - if (ret) - return ret; + ret = nvif_object_map_handle(&mem->mem.object, + &argc, argc, + &handle, &length); + if (ret != 1) + return ret ? ret : -EINVAL; - nvkm_vm_map(&mem->bar_vma, mem); - reg->bus.offset = mem->bar_vma.offset; + reg->bus.base = 0; + reg->bus.offset = handle; } break; default: @@ -1385,13 +1476,22 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg) static void nouveau_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg) { - struct nvkm_mem *mem = reg->mm_node; - - if (!mem->bar_vma.node) - return; + struct nouveau_drm *drm = nouveau_bdev(bdev); + struct nouveau_mem *mem = nouveau_mem(reg); - nvkm_vm_unmap(&mem->bar_vma); - nvkm_vm_put(&mem->bar_vma); + if (drm->client.mem->oclass >= NVIF_CLASS_MEM_NV50) { + switch (reg->mem_type) { + case TTM_PL_TT: + if (mem->kind) + nvif_object_unmap_handle(&mem->mem.object); + break; + case TTM_PL_VRAM: + nvif_object_unmap_handle(&mem->mem.object); + break; + default: + break; + } + } } static int @@ -1408,7 +1508,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo) */ if (bo->mem.mem_type != TTM_PL_VRAM) { if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA || - !nouveau_bo_tile_layout(nvbo)) + !nvbo->kind) return 0; if (bo->mem.mem_type == TTM_PL_SYSTEM) { @@ -1445,9 +1545,7 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm) { struct ttm_dma_tt *ttm_dma = (void *)ttm; struct nouveau_drm *drm; - struct nvkm_device *device; - struct drm_device *dev; - struct device *pdev; + struct device *dev; unsigned i; int r; bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); @@ -1464,9 +1562,7 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm) } drm = nouveau_bdev(ttm->bdev); - device = nvxx_device(&drm->client.device); - dev = drm->dev; - pdev = device->dev; + dev = drm->dev->dev; #if IS_ENABLED(CONFIG_AGP) if (drm->agp.bridge) { @@ -1476,7 +1572,7 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm) #if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86) if (swiotlb_nr_tbl()) { - return ttm_dma_populate((void *)ttm, dev->dev); + return ttm_dma_populate((void *)ttm, dev); } #endif @@ -1488,12 +1584,12 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm) for (i = 0; i < ttm->num_pages; i++) { dma_addr_t addr; - addr = dma_map_page(pdev, ttm->pages[i], 0, PAGE_SIZE, + addr = dma_map_page(dev, ttm->pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); - if (dma_mapping_error(pdev, addr)) { + if (dma_mapping_error(dev, addr)) { while (i--) { - dma_unmap_page(pdev, ttm_dma->dma_address[i], + dma_unmap_page(dev, ttm_dma->dma_address[i], PAGE_SIZE, DMA_BIDIRECTIONAL); ttm_dma->dma_address[i] = 0; } @@ -1511,9 +1607,7 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm) { struct ttm_dma_tt *ttm_dma = (void *)ttm; struct nouveau_drm *drm; - struct nvkm_device *device; - struct drm_device *dev; - struct device *pdev; + struct device *dev; unsigned i; bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); @@ -1521,9 +1615,7 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm) return; drm = nouveau_bdev(ttm->bdev); - device = nvxx_device(&drm->client.device); - dev = drm->dev; - pdev = device->dev; + dev = drm->dev->dev; #if IS_ENABLED(CONFIG_AGP) if (drm->agp.bridge) { @@ -1534,14 +1626,14 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm) #if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86) if (swiotlb_nr_tbl()) { - ttm_dma_unpopulate((void *)ttm, dev->dev); + ttm_dma_unpopulate((void *)ttm, dev); return; } #endif for (i = 0; i < ttm->num_pages; i++) { if (ttm_dma->dma_address[i]) { - dma_unmap_page(pdev, ttm_dma->dma_address[i], PAGE_SIZE, + dma_unmap_page(dev, ttm_dma->dma_address[i], PAGE_SIZE, DMA_BIDIRECTIONAL); } } @@ -1576,48 +1668,3 @@ struct ttm_bo_driver nouveau_bo_driver = { .io_mem_free = &nouveau_ttm_io_mem_free, .io_mem_pfn = ttm_bo_default_io_mem_pfn, }; - -struct nvkm_vma * -nouveau_bo_vma_find(struct nouveau_bo *nvbo, struct nvkm_vm *vm) -{ - struct nvkm_vma *vma; - list_for_each_entry(vma, &nvbo->vma_list, head) { - if (vma->vm == vm) - return vma; - } - - return NULL; -} - -int -nouveau_bo_vma_add(struct nouveau_bo *nvbo, struct nvkm_vm *vm, - struct nvkm_vma *vma) -{ - const u32 size = nvbo->bo.mem.num_pages << PAGE_SHIFT; - int ret; - - ret = nvkm_vm_get(vm, size, nvbo->page_shift, - NV_MEM_ACCESS_RW, vma); - if (ret) - return ret; - - if ( nvbo->bo.mem.mem_type != TTM_PL_SYSTEM && - (nvbo->bo.mem.mem_type == TTM_PL_VRAM || - nvbo->page_shift != vma->vm->mmu->lpg_shift)) - nvkm_vm_map(vma, nvbo->bo.mem.mm_node); - - list_add_tail(&vma->head, &nvbo->vma_list); - vma->refcount = 1; - return 0; -} - -void -nouveau_bo_vma_del(struct nouveau_bo *nvbo, struct nvkm_vma *vma) -{ - if (vma->node) { - if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM) - nvkm_vm_unmap(vma); - nvkm_vm_put(vma); - list_del(&vma->head); - } -} diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index b06a5385d6dd..23002bdd94a8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h @@ -24,12 +24,16 @@ struct nouveau_bo { bool validate_mapped; struct list_head vma_list; - unsigned page_shift; struct nouveau_cli *cli; - u32 tile_mode; - u32 tile_flags; + unsigned contig:1; + unsigned page:5; + unsigned kind:8; + unsigned comp:3; + unsigned zeta:3; + unsigned mode; + struct nouveau_drm_tile *tile; /* Only valid if allocated via nouveau_gem_new() and iff you hold a @@ -89,13 +93,6 @@ int nouveau_bo_validate(struct nouveau_bo *, bool interruptible, void nouveau_bo_sync_for_device(struct nouveau_bo *nvbo); void nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo); -struct nvkm_vma * -nouveau_bo_vma_find(struct nouveau_bo *, struct nvkm_vm *); - -int nouveau_bo_vma_add(struct nouveau_bo *, struct nvkm_vm *, - struct nvkm_vma *); -void nouveau_bo_vma_del(struct nouveau_bo *, struct nvkm_vma *); - /* TODO: submit equivalent to TTM generic API upstream? */ static inline void __iomem * nvbo_kmap_obj_iovirtual(struct nouveau_bo *nvbo) diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index dbc41fa86ee8..af1116655910 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -40,6 +40,7 @@ #include "nouveau_chan.h" #include "nouveau_fence.h" #include "nouveau_abi16.h" +#include "nouveau_vmm.h" MODULE_PARM_DESC(vram_pushbuf, "Create DMA push buffers in VRAM"); int nouveau_vram_pushbuf; @@ -83,6 +84,14 @@ nouveau_channel_del(struct nouveau_channel **pchan) { struct nouveau_channel *chan = *pchan; if (chan) { + struct nouveau_cli *cli = (void *)chan->user.client; + bool super; + + if (cli) { + super = cli->base.super; + cli->base.super = true; + } + if (chan->fence) nouveau_fence(chan->drm)->context_del(chan); nvif_object_fini(&chan->nvsw); @@ -91,12 +100,15 @@ nouveau_channel_del(struct nouveau_channel **pchan) nvif_notify_fini(&chan->kill); nvif_object_fini(&chan->user); nvif_object_fini(&chan->push.ctxdma); - nouveau_bo_vma_del(chan->push.buffer, &chan->push.vma); + nouveau_vma_del(&chan->push.vma); nouveau_bo_unmap(chan->push.buffer); if (chan->push.buffer && chan->push.buffer->pin_refcnt) nouveau_bo_unpin(chan->push.buffer); nouveau_bo_ref(NULL, &chan->push.buffer); kfree(chan); + + if (cli) + cli->base.super = super; } *pchan = NULL; } @@ -106,7 +118,6 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, u32 size, struct nouveau_channel **pchan) { struct nouveau_cli *cli = (void *)device->object.client; - struct nvkm_mmu *mmu = nvxx_mmu(device); struct nv_dma_v0 args = {}; struct nouveau_channel *chan; u32 target; @@ -142,11 +153,11 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, * pushbuf lives in, this is because the GEM code requires that * we be able to call out to other (indirect) push buffers */ - chan->push.vma.offset = chan->push.buffer->bo.offset; + chan->push.addr = chan->push.buffer->bo.offset; if (device->info.family >= NV_DEVICE_INFO_V0_TESLA) { - ret = nouveau_bo_vma_add(chan->push.buffer, cli->vm, - &chan->push.vma); + ret = nouveau_vma_new(chan->push.buffer, &cli->vmm, + &chan->push.vma); if (ret) { nouveau_channel_del(pchan); return ret; @@ -155,7 +166,9 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_VM; args.start = 0; - args.limit = cli->vm->mmu->limit - 1; + args.limit = cli->vmm.vmm.limit - 1; + + chan->push.addr = chan->push.vma->addr; } else if (chan->push.buffer->bo.mem.mem_type == TTM_PL_VRAM) { if (device->info.family == NV_DEVICE_INFO_V0_TNT) { @@ -185,7 +198,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_RDWR; args.start = 0; - args.limit = mmu->limit - 1; + args.limit = cli->vmm.vmm.limit - 1; } } @@ -203,6 +216,7 @@ static int nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device, u32 engine, struct nouveau_channel **pchan) { + struct nouveau_cli *cli = (void *)device->object.client; static const u16 oclasses[] = { PASCAL_CHANNEL_GPFIFO_A, MAXWELL_CHANNEL_GPFIFO_A, KEPLER_CHANNEL_GPFIFO_B, @@ -233,22 +247,22 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device, args.kepler.version = 0; args.kepler.engines = engine; args.kepler.ilength = 0x02000; - args.kepler.ioffset = 0x10000 + chan->push.vma.offset; - args.kepler.vm = 0; + args.kepler.ioffset = 0x10000 + chan->push.addr; + args.kepler.vmm = nvif_handle(&cli->vmm.vmm.object); size = sizeof(args.kepler); } else if (oclass[0] >= FERMI_CHANNEL_GPFIFO) { args.fermi.version = 0; args.fermi.ilength = 0x02000; - args.fermi.ioffset = 0x10000 + chan->push.vma.offset; - args.fermi.vm = 0; + args.fermi.ioffset = 0x10000 + chan->push.addr; + args.fermi.vmm = nvif_handle(&cli->vmm.vmm.object); size = sizeof(args.fermi); } else { args.nv50.version = 0; args.nv50.ilength = 0x02000; - args.nv50.ioffset = 0x10000 + chan->push.vma.offset; + args.nv50.ioffset = 0x10000 + chan->push.addr; args.nv50.pushbuf = nvif_handle(&chan->push.ctxdma); - args.nv50.vm = 0; + args.nv50.vmm = nvif_handle(&cli->vmm.vmm.object); size = sizeof(args.nv50); } @@ -293,7 +307,7 @@ nouveau_channel_dma(struct nouveau_drm *drm, struct nvif_device *device, /* create channel object */ args.version = 0; args.pushbuf = nvif_handle(&chan->push.ctxdma); - args.offset = chan->push.vma.offset; + args.offset = chan->push.addr; do { ret = nvif_object_init(&device->object, 0, *oclass++, @@ -314,11 +328,10 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) struct nvif_device *device = chan->device; struct nouveau_cli *cli = (void *)chan->user.client; struct nouveau_drm *drm = chan->drm; - struct nvkm_mmu *mmu = nvxx_mmu(device); struct nv_dma_v0 args = {}; int ret, i; - nvif_object_map(&chan->user); + nvif_object_map(&chan->user, NULL, 0); if (chan->user.oclass >= FERMI_CHANNEL_GPFIFO) { ret = nvif_notify_init(&chan->user, nouveau_channel_killed, @@ -339,7 +352,7 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_VM; args.start = 0; - args.limit = cli->vm->mmu->limit - 1; + args.limit = cli->vmm.vmm.limit - 1; } else { args.target = NV_DMA_V0_TARGET_VRAM; args.access = NV_DMA_V0_ACCESS_RDWR; @@ -356,7 +369,7 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_VM; args.start = 0; - args.limit = cli->vm->mmu->limit - 1; + args.limit = cli->vmm.vmm.limit - 1; } else if (chan->drm->agp.bridge) { args.target = NV_DMA_V0_TARGET_AGP; @@ -368,7 +381,7 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_RDWR; args.start = 0; - args.limit = mmu->limit - 1; + args.limit = cli->vmm.vmm.limit - 1; } ret = nvif_object_init(&chan->user, gart, NV_DMA_IN_MEMORY, diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h index 46b947ba1cf4..f29d3a72c48c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.h +++ b/drivers/gpu/drm/nouveau/nouveau_chan.h @@ -16,8 +16,9 @@ struct nouveau_channel { struct { struct nouveau_bo *buffer; - struct nvkm_vma vma; + struct nouveau_vma *vma; struct nvif_object ctxdma; + u64 addr; } push; /* TODO: this will be reworked in the near future */ diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h index 201aec2ea5b8..1411bf05b89d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.h +++ b/drivers/gpu/drm/nouveau/nouveau_display.h @@ -1,14 +1,11 @@ #ifndef __NOUVEAU_DISPLAY_H__ #define __NOUVEAU_DISPLAY_H__ - -#include <subdev/mmu.h> - #include "nouveau_drv.h" struct nouveau_framebuffer { struct drm_framebuffer base; struct nouveau_bo *nvbo; - struct nvkm_vma vma; + struct nouveau_vma *vma; u32 r_handle; u32 r_format; u32 r_pitch; diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c index 2634a1a79888..10e84f6ca2b7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.c +++ b/drivers/gpu/drm/nouveau/nouveau_dma.c @@ -26,6 +26,7 @@ #include "nouveau_drv.h" #include "nouveau_dma.h" +#include "nouveau_vmm.h" void OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords) @@ -71,11 +72,11 @@ READ_GET(struct nouveau_channel *chan, uint64_t *prev_get, int *timeout) return -EBUSY; } - if (val < chan->push.vma.offset || - val > chan->push.vma.offset + (chan->dma.max << 2)) + if (val < chan->push.addr || + val > chan->push.addr + (chan->dma.max << 2)) return -EINVAL; - return (val - chan->push.vma.offset) >> 2; + return (val - chan->push.addr) >> 2; } void @@ -84,13 +85,13 @@ nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo, { struct nouveau_cli *cli = (void *)chan->user.client; struct nouveau_bo *pb = chan->push.buffer; - struct nvkm_vma *vma; + struct nouveau_vma *vma; int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base; u64 offset; - vma = nouveau_bo_vma_find(bo, cli->vm); + vma = nouveau_vma_find(bo, &cli->vmm); BUG_ON(!vma); - offset = vma->offset + delta; + offset = vma->addr + delta; BUG_ON(chan->dma.ib_free < 1); @@ -224,7 +225,7 @@ nouveau_dma_wait(struct nouveau_channel *chan, int slots, int size) * instruct the GPU to jump back to the start right * after processing the currently pending commands. */ - OUT_RING(chan, chan->push.vma.offset | 0x20000000); + OUT_RING(chan, chan->push.addr | 0x20000000); /* wait for GET to depart from the skips area. * prevents writing GET==PUT and causing a race diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h index aff3a9d0a1fc..74e10b14a7da 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.h +++ b/drivers/gpu/drm/nouveau/nouveau_dma.h @@ -140,7 +140,7 @@ BEGIN_IMC0(struct nouveau_channel *chan, int subc, int mthd, u16 data) #define WRITE_PUT(val) do { \ mb(); \ nouveau_bo_rd32(chan->push.buffer, 0); \ - nvif_wr32(&chan->user, chan->user_put, ((val) << 2) + chan->push.vma.offset); \ + nvif_wr32(&chan->user, chan->user_put, ((val) << 2) + chan->push.addr);\ } while (0) static inline void diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 595630d1fb9e..8d4a5be3b913 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -111,33 +111,119 @@ nouveau_name(struct drm_device *dev) return nouveau_platform_name(to_platform_device(dev->dev)); } +static inline bool +nouveau_cli_work_ready(struct dma_fence *fence, bool wait) +{ + if (!dma_fence_is_signaled(fence)) { + if (!wait) + return false; + WARN_ON(dma_fence_wait_timeout(fence, false, 2 * HZ) <= 0); + } + dma_fence_put(fence); + return true; +} + +static void +nouveau_cli_work_flush(struct nouveau_cli *cli, bool wait) +{ + struct nouveau_cli_work *work, *wtmp; + mutex_lock(&cli->lock); + list_for_each_entry_safe(work, wtmp, &cli->worker, head) { + if (!work->fence || nouveau_cli_work_ready(work->fence, wait)) { + list_del(&work->head); + work->func(work); + } + } + mutex_unlock(&cli->lock); +} + +static void +nouveau_cli_work_fence(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct nouveau_cli_work *work = container_of(cb, typeof(*work), cb); + schedule_work(&work->cli->work); +} + +void +nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence, + struct nouveau_cli_work *work) +{ + work->fence = dma_fence_get(fence); + work->cli = cli; + mutex_lock(&cli->lock); + list_add_tail(&work->head, &cli->worker); + mutex_unlock(&cli->lock); + if (dma_fence_add_callback(fence, &work->cb, nouveau_cli_work_fence)) + nouveau_cli_work_fence(fence, &work->cb); +} + +static void +nouveau_cli_work(struct work_struct *w) +{ + struct nouveau_cli *cli = container_of(w, typeof(*cli), work); + nouveau_cli_work_flush(cli, false); +} + static void nouveau_cli_fini(struct nouveau_cli *cli) { - nvkm_vm_ref(NULL, &nvxx_client(&cli->base)->vm, NULL); + nouveau_cli_work_flush(cli, true); usif_client_fini(cli); + nouveau_vmm_fini(&cli->vmm); + nvif_mmu_fini(&cli->mmu); nvif_device_fini(&cli->device); + mutex_lock(&cli->drm->master.lock); nvif_client_fini(&cli->base); + mutex_unlock(&cli->drm->master.lock); } static int nouveau_cli_init(struct nouveau_drm *drm, const char *sname, struct nouveau_cli *cli) { + static const struct nvif_mclass + mems[] = { + { NVIF_CLASS_MEM_GF100, -1 }, + { NVIF_CLASS_MEM_NV50 , -1 }, + { NVIF_CLASS_MEM_NV04 , -1 }, + {} + }; + static const struct nvif_mclass + mmus[] = { + { NVIF_CLASS_MMU_GF100, -1 }, + { NVIF_CLASS_MMU_NV50 , -1 }, + { NVIF_CLASS_MMU_NV04 , -1 }, + {} + }; + static const struct nvif_mclass + vmms[] = { + { NVIF_CLASS_VMM_GP100, -1 }, + { NVIF_CLASS_VMM_GM200, -1 }, + { NVIF_CLASS_VMM_GF100, -1 }, + { NVIF_CLASS_VMM_NV50 , -1 }, + { NVIF_CLASS_VMM_NV04 , -1 }, + {} + }; u64 device = nouveau_name(drm->dev); int ret; snprintf(cli->name, sizeof(cli->name), "%s", sname); - cli->dev = drm->dev; + cli->drm = drm; mutex_init(&cli->mutex); usif_client_init(cli); - if (cli == &drm->client) { + INIT_WORK(&cli->work, nouveau_cli_work); + INIT_LIST_HEAD(&cli->worker); + mutex_init(&cli->lock); + + if (cli == &drm->master) { ret = nvif_driver_init(NULL, nouveau_config, nouveau_debug, cli->name, device, &cli->base); } else { - ret = nvif_client_init(&drm->client.base, cli->name, device, + mutex_lock(&drm->master.lock); + ret = nvif_client_init(&drm->master.base, cli->name, device, &cli->base); + mutex_unlock(&drm->master.lock); } if (ret) { NV_ERROR(drm, "Client allocation failed: %d\n", ret); @@ -154,6 +240,38 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, goto done; } + ret = nvif_mclass(&cli->device.object, mmus); + if (ret < 0) { + NV_ERROR(drm, "No supported MMU class\n"); + goto done; + } + + ret = nvif_mmu_init(&cli->device.object, mmus[ret].oclass, &cli->mmu); + if (ret) { + NV_ERROR(drm, "MMU allocation failed: %d\n", ret); + goto done; + } + + ret = nvif_mclass(&cli->mmu.object, vmms); + if (ret < 0) { + NV_ERROR(drm, "No supported VMM class\n"); + goto done; + } + + ret = nouveau_vmm_init(cli, vmms[ret].oclass, &cli->vmm); + if (ret) { + NV_ERROR(drm, "VMM allocation failed: %d\n", ret); + goto done; + } + + ret = nvif_mclass(&cli->mmu.object, mems); + if (ret < 0) { + NV_ERROR(drm, "No supported MEM class\n"); + goto done; + } + + cli->mem = &mems[ret]; + return 0; done: if (ret) nouveau_cli_fini(cli); @@ -433,6 +551,10 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags) dev->dev_private = drm; drm->dev = dev; + ret = nouveau_cli_init(drm, "DRM-master", &drm->master); + if (ret) + return ret; + ret = nouveau_cli_init(drm, "DRM", &drm->client); if (ret) return ret; @@ -456,21 +578,6 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags) nouveau_vga_init(drm); - if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { - if (!nvxx_device(&drm->client.device)->mmu) { - ret = -ENOSYS; - goto fail_device; - } - - ret = nvkm_vm_new(nvxx_device(&drm->client.device), - 0, (1ULL << 40), 0x1000, NULL, - &drm->client.vm); - if (ret) - goto fail_device; - - nvxx_client(&drm->client.base)->vm = drm->client.vm; - } - ret = nouveau_ttm_init(drm); if (ret) goto fail_ttm; @@ -516,8 +623,8 @@ fail_bios: nouveau_ttm_fini(drm); fail_ttm: nouveau_vga_fini(drm); -fail_device: nouveau_cli_fini(&drm->client); + nouveau_cli_fini(&drm->master); kfree(drm); return ret; } @@ -550,6 +657,7 @@ nouveau_drm_unload(struct drm_device *dev) if (drm->hdmi_device) pci_dev_put(drm->hdmi_device); nouveau_cli_fini(&drm->client); + nouveau_cli_fini(&drm->master); kfree(drm); } @@ -618,7 +726,7 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime) } NV_DEBUG(drm, "suspending object tree...\n"); - ret = nvif_client_suspend(&drm->client.base); + ret = nvif_client_suspend(&drm->master.base); if (ret) goto fail_client; @@ -642,7 +750,7 @@ nouveau_do_resume(struct drm_device *dev, bool runtime) struct nouveau_drm *drm = nouveau_drm(dev); NV_DEBUG(drm, "resuming object tree...\n"); - nvif_client_resume(&drm->client.base); + nvif_client_resume(&drm->master.base); NV_DEBUG(drm, "resuming fence...\n"); if (drm->fence && nouveau_fence(drm)->resume) @@ -850,15 +958,6 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv) cli->base.super = false; - if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { - ret = nvkm_vm_new(nvxx_device(&drm->client.device), 0, - (1ULL << 40), 0x1000, NULL, &cli->vm); - if (ret) - goto done; - - nvxx_client(&cli->base)->vm = cli->vm; - } - fpriv->driver_priv = cli; mutex_lock(&drm->client.mutex); diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 822fe1d4d35e..e86b8220a4bb 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -5,7 +5,7 @@ #define DRIVER_EMAIL "nouveau@lists.freedesktop.org" #define DRIVER_NAME "nouveau" -#define DRIVER_DESC "nVidia Riva/TNT/GeForce/Quadro/Tesla" +#define DRIVER_DESC "nVidia Riva/TNT/GeForce/Quadro/Tesla/Tegra K1+" #define DRIVER_DATE "20120801" #define DRIVER_MAJOR 1 @@ -42,6 +42,8 @@ #include <nvif/client.h> #include <nvif/device.h> #include <nvif/ioctl.h> +#include <nvif/mmu.h> +#include <nvif/vmm.h> #include <drm/drmP.h> @@ -61,6 +63,7 @@ struct platform_device; #include "nouveau_fence.h" #include "nouveau_bios.h" +#include "nouveau_vmm.h" struct nouveau_drm_tile { struct nouveau_fence *fence; @@ -86,19 +89,37 @@ enum nouveau_drm_handle { struct nouveau_cli { struct nvif_client base; - struct drm_device *dev; + struct nouveau_drm *drm; struct mutex mutex; struct nvif_device device; + struct nvif_mmu mmu; + struct nouveau_vmm vmm; + const struct nvif_mclass *mem; - struct nvkm_vm *vm; /*XXX*/ struct list_head head; void *abi16; struct list_head objects; struct list_head notifys; char name[32]; + + struct work_struct work; + struct list_head worker; + struct mutex lock; }; +struct nouveau_cli_work { + void (*func)(struct nouveau_cli_work *); + struct nouveau_cli *cli; + struct list_head head; + + struct dma_fence *fence; + struct dma_fence_cb cb; +}; + +void nouveau_cli_work_queue(struct nouveau_cli *, struct dma_fence *, + struct nouveau_cli_work *); + static inline struct nouveau_cli * nouveau_cli(struct drm_file *fpriv) { @@ -109,6 +130,7 @@ nouveau_cli(struct drm_file *fpriv) #include <nvif/device.h> struct nouveau_drm { + struct nouveau_cli master; struct nouveau_cli client; struct drm_device *dev; @@ -133,6 +155,9 @@ struct nouveau_drm { struct nouveau_channel *chan; struct nvif_object copy; int mtrr; + int type_vram; + int type_host; + int type_ncoh; } ttm; /* GEM interface support */ @@ -204,7 +229,7 @@ void nouveau_drm_device_remove(struct drm_device *dev); #define NV_PRINTK(l,c,f,a...) do { \ struct nouveau_cli *_cli = (c); \ - dev_##l(_cli->dev->dev, "%s: "f, _cli->name, ##a); \ + dev_##l(_cli->drm->dev->dev, "%s: "f, _cli->name, ##a); \ } while(0) #define NV_FATAL(drm,f,a...) NV_PRINTK(crit, &(drm)->client, f, ##a) #define NV_ERROR(drm,f,a...) NV_PRINTK(err, &(drm)->client, f, ##a) diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index f7707849bb53..c533d8e04afc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -48,6 +48,7 @@ #include "nouveau_bo.h" #include "nouveau_fbcon.h" #include "nouveau_chan.h" +#include "nouveau_vmm.h" #include "nouveau_crtc.h" @@ -223,7 +224,7 @@ void nouveau_fbcon_accel_save_disable(struct drm_device *dev) { struct nouveau_drm *drm = nouveau_drm(dev); - if (drm->fbcon) { + if (drm->fbcon && drm->fbcon->helper.fbdev) { drm->fbcon->saved_flags = drm->fbcon->helper.fbdev->flags; drm->fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED; } @@ -233,7 +234,7 @@ void nouveau_fbcon_accel_restore(struct drm_device *dev) { struct nouveau_drm *drm = nouveau_drm(dev); - if (drm->fbcon) { + if (drm->fbcon && drm->fbcon->helper.fbdev) { drm->fbcon->helper.fbdev->flags = drm->fbcon->saved_flags; } } @@ -245,7 +246,8 @@ nouveau_fbcon_accel_fini(struct drm_device *dev) struct nouveau_fbdev *fbcon = drm->fbcon; if (fbcon && drm->channel) { console_lock(); - fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED; + if (fbcon->helper.fbdev) + fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED; console_unlock(); nouveau_channel_idle(drm->channel); nvif_object_fini(&fbcon->twod); @@ -347,7 +349,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper, chan = nouveau_nofbaccel ? NULL : drm->channel; if (chan && device->info.family >= NV_DEVICE_INFO_V0_TESLA) { - ret = nouveau_bo_vma_add(nvbo, drm->client.vm, &fb->vma); + ret = nouveau_vma_new(nvbo, &drm->client.vmm, &fb->vma); if (ret) { NV_ERROR(drm, "failed to map fb into chan: %d\n", ret); chan = NULL; @@ -401,7 +403,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper, out_unlock: if (chan) - nouveau_bo_vma_del(fb->nvbo, &fb->vma); + nouveau_vma_del(&fb->vma); nouveau_bo_unmap(fb->nvbo); out_unpin: nouveau_bo_unpin(fb->nvbo); @@ -428,7 +430,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon) drm_fb_helper_fini(&fbcon->helper); if (nouveau_fb->nvbo) { - nouveau_bo_vma_del(nouveau_fb->nvbo, &nouveau_fb->vma); + nouveau_vma_del(&nouveau_fb->vma); nouveau_bo_unmap(nouveau_fb->nvbo); nouveau_bo_unpin(nouveau_fb->nvbo); drm_framebuffer_unreference(&nouveau_fb->base); diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 99e14e3e0fe4..503fa94dc06d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -199,62 +199,6 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha WARN_ON(ret); } -struct nouveau_fence_work { - struct work_struct work; - struct dma_fence_cb cb; - void (*func)(void *); - void *data; -}; - -static void -nouveau_fence_work_handler(struct work_struct *kwork) -{ - struct nouveau_fence_work *work = container_of(kwork, typeof(*work), work); - work->func(work->data); - kfree(work); -} - -static void nouveau_fence_work_cb(struct dma_fence *fence, struct dma_fence_cb *cb) -{ - struct nouveau_fence_work *work = container_of(cb, typeof(*work), cb); - - schedule_work(&work->work); -} - -void -nouveau_fence_work(struct dma_fence *fence, - void (*func)(void *), void *data) -{ - struct nouveau_fence_work *work; - - if (dma_fence_is_signaled(fence)) - goto err; - - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (!work) { - /* - * this might not be a nouveau fence any more, - * so force a lazy wait here - */ - WARN_ON(nouveau_fence_wait((struct nouveau_fence *)fence, - true, false)); - goto err; - } - - INIT_WORK(&work->work, nouveau_fence_work_handler); - work->func = func; - work->data = data; - - if (dma_fence_add_callback(fence, &work->cb, nouveau_fence_work_cb) < 0) - goto err_free; - return; - -err_free: - kfree(work); -err: - func(data); -} - int nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan) { @@ -474,8 +418,6 @@ nouveau_fence_new(struct nouveau_channel *chan, bool sysmem, if (!fence) return -ENOMEM; - fence->sysmem = sysmem; - ret = nouveau_fence_emit(fence, chan); if (ret) nouveau_fence_unref(&fence); diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index d5e58a38f160..c36031aa013e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -12,8 +12,6 @@ struct nouveau_fence { struct list_head head; - bool sysmem; - struct nouveau_channel __rcu *channel; unsigned long timeout; }; @@ -24,7 +22,6 @@ void nouveau_fence_unref(struct nouveau_fence **); int nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *); bool nouveau_fence_done(struct nouveau_fence *); -void nouveau_fence_work(struct dma_fence *, void (*)(void *), void *); int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr); @@ -90,14 +87,12 @@ int nouveau_flip_complete(struct nvif_notify *); struct nv84_fence_chan { struct nouveau_fence_chan base; - struct nvkm_vma vma; - struct nvkm_vma vma_gart; + struct nouveau_vma *vma; }; struct nv84_fence_priv { struct nouveau_fence_priv base; struct nouveau_bo *bo; - struct nouveau_bo *bo_gart; u32 *suspend; struct mutex mutex; }; diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 2170534101ca..efc89aaef66a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -31,6 +31,10 @@ #include "nouveau_ttm.h" #include "nouveau_gem.h" +#include "nouveau_mem.h" +#include "nouveau_vmm.h" + +#include <nvif/class.h> void nouveau_gem_object_del(struct drm_gem_object *gem) @@ -64,66 +68,61 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); - struct nvkm_vma *vma; struct device *dev = drm->dev->dev; + struct nouveau_vma *vma; int ret; - if (!cli->vm) + if (cli->vmm.vmm.object.oclass < NVIF_CLASS_VMM_NV50) return 0; ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL); if (ret) return ret; - vma = nouveau_bo_vma_find(nvbo, cli->vm); - if (!vma) { - vma = kzalloc(sizeof(*vma), GFP_KERNEL); - if (!vma) { - ret = -ENOMEM; - goto out; - } - - ret = pm_runtime_get_sync(dev); - if (ret < 0 && ret != -EACCES) { - kfree(vma); - goto out; - } - - ret = nouveau_bo_vma_add(nvbo, cli->vm, vma); - if (ret) - kfree(vma); - - pm_runtime_mark_last_busy(dev); - pm_runtime_put_autosuspend(dev); - } else { - vma->refcount++; - } + ret = pm_runtime_get_sync(dev); + if (ret < 0 && ret != -EACCES) + goto out; + ret = nouveau_vma_new(nvbo, &cli->vmm, &vma); + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); out: ttm_bo_unreserve(&nvbo->bo); return ret; } +struct nouveau_gem_object_unmap { + struct nouveau_cli_work work; + struct nouveau_vma *vma; +}; + static void -nouveau_gem_object_delete(void *data) +nouveau_gem_object_delete(struct nouveau_vma *vma) { - struct nvkm_vma *vma = data; - nvkm_vm_unmap(vma); - nvkm_vm_put(vma); - kfree(vma); + nouveau_vma_del(&vma); } static void -nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nvkm_vma *vma) +nouveau_gem_object_delete_work(struct nouveau_cli_work *w) +{ + struct nouveau_gem_object_unmap *work = + container_of(w, typeof(*work), work); + nouveau_gem_object_delete(work->vma); + kfree(work); +} + +static void +nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nouveau_vma *vma) { const bool mapped = nvbo->bo.mem.mem_type != TTM_PL_SYSTEM; struct reservation_object *resv = nvbo->bo.resv; struct reservation_object_list *fobj; + struct nouveau_gem_object_unmap *work; struct dma_fence *fence = NULL; fobj = reservation_object_get_list(resv); - list_del(&vma->head); + list_del_init(&vma->head); if (fobj && fobj->shared_count > 1) ttm_bo_wait(&nvbo->bo, false, false); @@ -133,14 +132,20 @@ nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nvkm_vma *vma) else fence = reservation_object_get_excl(nvbo->bo.resv); - if (fence && mapped) { - nouveau_fence_work(fence, nouveau_gem_object_delete, vma); - } else { - if (mapped) - nvkm_vm_unmap(vma); - nvkm_vm_put(vma); - kfree(vma); + if (!fence || !mapped) { + nouveau_gem_object_delete(vma); + return; + } + + if (!(work = kmalloc(sizeof(*work), GFP_KERNEL))) { + WARN_ON(dma_fence_wait_timeout(fence, false, 2 * HZ) <= 0); + nouveau_gem_object_delete(vma); + return; } + + work->work.func = nouveau_gem_object_delete_work; + work->vma = vma; + nouveau_cli_work_queue(vma->vmm->cli, fence, &work->work); } void @@ -150,19 +155,19 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv) struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct device *dev = drm->dev->dev; - struct nvkm_vma *vma; + struct nouveau_vma *vma; int ret; - if (!cli->vm) + if (cli->vmm.vmm.object.oclass < NVIF_CLASS_VMM_NV50) return; ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL); if (ret) return; - vma = nouveau_bo_vma_find(nvbo, cli->vm); + vma = nouveau_vma_find(nvbo, &cli->vmm); if (vma) { - if (--vma->refcount == 0) { + if (--vma->refs == 0) { ret = pm_runtime_get_sync(dev); if (!WARN_ON(ret < 0 && ret != -EACCES)) { nouveau_gem_object_unmap(nvbo, vma); @@ -179,7 +184,7 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain, uint32_t tile_mode, uint32_t tile_flags, struct nouveau_bo **pnvbo) { - struct nouveau_drm *drm = nouveau_drm(cli->dev); + struct nouveau_drm *drm = cli->drm; struct nouveau_bo *nvbo; u32 flags = 0; int ret; @@ -227,7 +232,7 @@ nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem, { struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_bo *nvbo = nouveau_gem_object(gem); - struct nvkm_vma *vma; + struct nouveau_vma *vma; if (is_power_of_2(nvbo->valid_domains)) rep->domain = nvbo->valid_domains; @@ -236,18 +241,25 @@ nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem, else rep->domain = NOUVEAU_GEM_DOMAIN_VRAM; rep->offset = nvbo->bo.offset; - if (cli->vm) { - vma = nouveau_bo_vma_find(nvbo, cli->vm); + if (cli->vmm.vmm.object.oclass >= NVIF_CLASS_VMM_NV50) { + vma = nouveau_vma_find(nvbo, &cli->vmm); if (!vma) return -EINVAL; - rep->offset = vma->offset; + rep->offset = vma->addr; } rep->size = nvbo->bo.mem.num_pages << PAGE_SHIFT; rep->map_handle = drm_vma_node_offset_addr(&nvbo->bo.vma_node); - rep->tile_mode = nvbo->tile_mode; - rep->tile_flags = nvbo->tile_flags; + rep->tile_mode = nvbo->mode; + rep->tile_flags = nvbo->contig ? 0 : NOUVEAU_GEM_TILE_NONCONTIG; + if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) + rep->tile_flags |= nvbo->kind << 8; + else + if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) + rep->tile_flags |= nvbo->kind << 8 | nvbo->comp << 16; + else + rep->tile_flags |= nvbo->zeta; return 0; } @@ -255,18 +267,11 @@ int nouveau_gem_ioctl_new(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_cli *cli = nouveau_cli(file_priv); - struct nvkm_fb *fb = nvxx_fb(&drm->client.device); struct drm_nouveau_gem_new *req = data; struct nouveau_bo *nvbo = NULL; int ret = 0; - if (!nvkm_fb_memtype_valid(fb, req->info.tile_flags)) { - NV_PRINTK(err, cli, "bad page flags: 0x%08x\n", req->info.tile_flags); - return -EINVAL; - } - ret = nouveau_gem_new(cli, req->info.size, req->align, req->info.domain, req->info.tile_mode, req->info.tile_flags, &nvbo); @@ -791,7 +796,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, bo[push[i].bo_index].user_priv; uint32_t cmd; - cmd = chan->push.vma.offset + ((chan->dma.cur + 2) << 2); + cmd = chan->push.addr + ((chan->dma.cur + 2) << 2); cmd |= 0x20000000; if (unlikely(cmd != req->suffix0)) { if (!nvbo->kmap.virtual) { @@ -843,7 +848,7 @@ out_next: req->suffix1 = 0x00000000; } else { req->suffix0 = 0x20000000 | - (chan->push.vma.offset + ((chan->dma.cur + 2) << 2)); + (chan->push.addr + ((chan->dma.cur + 2) << 2)); req->suffix1 = 0x00000000; } diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.h b/drivers/gpu/drm/nouveau/nouveau_gem.h index 8fa6ed9ddd3a..d39f845dda87 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.h +++ b/drivers/gpu/drm/nouveau/nouveau_gem.h @@ -6,9 +6,6 @@ #include "nouveau_drv.h" #include "nouveau_bo.h" -#define nouveau_bo_tile_layout(nvbo) \ - ((nvbo)->tile_flags & NOUVEAU_GEM_TILE_LAYOUT_MASK) - static inline struct nouveau_bo * nouveau_gem_object(struct drm_gem_object *gem) { diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c new file mode 100644 index 000000000000..589a9621db76 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -0,0 +1,198 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "nouveau_mem.h" +#include "nouveau_drv.h" +#include "nouveau_bo.h" + +#include <drm/ttm/ttm_bo_driver.h> + +#include <nvif/class.h> +#include <nvif/if000a.h> +#include <nvif/if500b.h> +#include <nvif/if500d.h> +#include <nvif/if900b.h> +#include <nvif/if900d.h> + +int +nouveau_mem_map(struct nouveau_mem *mem, + struct nvif_vmm *vmm, struct nvif_vma *vma) +{ + union { + struct nv50_vmm_map_v0 nv50; + struct gf100_vmm_map_v0 gf100; + } args; + u32 argc = 0; + bool super; + int ret; + + switch (vmm->object.oclass) { + case NVIF_CLASS_VMM_NV04: + break; + case NVIF_CLASS_VMM_NV50: + args.nv50.version = 0; + args.nv50.ro = 0; + args.nv50.priv = 0; + args.nv50.kind = mem->kind; + args.nv50.comp = mem->comp; + argc = sizeof(args.nv50); + break; + case NVIF_CLASS_VMM_GF100: + case NVIF_CLASS_VMM_GM200: + case NVIF_CLASS_VMM_GP100: + args.gf100.version = 0; + if (mem->mem.type & NVIF_MEM_VRAM) + args.gf100.vol = 0; + else + args.gf100.vol = 1; + args.gf100.ro = 0; + args.gf100.priv = 0; + args.gf100.kind = mem->kind; + argc = sizeof(args.gf100); + break; + default: + WARN_ON(1); + return -ENOSYS; + } + + super = vmm->object.client->super; + vmm->object.client->super = true; + ret = nvif_vmm_map(vmm, vma->addr, mem->mem.size, &args, argc, + &mem->mem, 0); + vmm->object.client->super = super; + return ret; +} + +void +nouveau_mem_fini(struct nouveau_mem *mem) +{ + nvif_vmm_put(&mem->cli->drm->client.vmm.vmm, &mem->vma[1]); + nvif_vmm_put(&mem->cli->drm->client.vmm.vmm, &mem->vma[0]); + mutex_lock(&mem->cli->drm->master.lock); + nvif_mem_fini(&mem->mem); + mutex_unlock(&mem->cli->drm->master.lock); +} + +int +nouveau_mem_host(struct ttm_mem_reg *reg, struct ttm_dma_tt *tt) +{ + struct nouveau_mem *mem = nouveau_mem(reg); + struct nouveau_cli *cli = mem->cli; + struct nouveau_drm *drm = cli->drm; + struct nvif_mmu *mmu = &cli->mmu; + struct nvif_mem_ram_v0 args = {}; + bool super = cli->base.super; + u8 type; + int ret; + + if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) + type = drm->ttm.type_ncoh; + else + type = drm->ttm.type_host; + + if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND)) + mem->comp = mem->kind = 0; + if (mem->comp && !(mmu->type[type].type & NVIF_MEM_COMP)) { + if (mmu->object.oclass >= NVIF_CLASS_MMU_GF100) + mem->kind = mmu->kind[mem->kind]; + mem->comp = 0; + } + + if (tt->ttm.sg) args.sgl = tt->ttm.sg->sgl; + else args.dma = tt->dma_address; + + mutex_lock(&drm->master.lock); + cli->base.super = true; + ret = nvif_mem_init_type(mmu, cli->mem->oclass, type, PAGE_SHIFT, + reg->num_pages << PAGE_SHIFT, + &args, sizeof(args), &mem->mem); + cli->base.super = super; + mutex_unlock(&drm->master.lock); + return ret; +} + +int +nouveau_mem_vram(struct ttm_mem_reg *reg, bool contig, u8 page) +{ + struct nouveau_mem *mem = nouveau_mem(reg); + struct nouveau_cli *cli = mem->cli; + struct nouveau_drm *drm = cli->drm; + struct nvif_mmu *mmu = &cli->mmu; + bool super = cli->base.super; + u64 size = ALIGN(reg->num_pages << PAGE_SHIFT, 1 << page); + int ret; + + mutex_lock(&drm->master.lock); + cli->base.super = true; + switch (cli->mem->oclass) { + case NVIF_CLASS_MEM_GF100: + ret = nvif_mem_init_type(mmu, cli->mem->oclass, + drm->ttm.type_vram, page, size, + &(struct gf100_mem_v0) { + .contig = contig, + }, sizeof(struct gf100_mem_v0), + &mem->mem); + break; + case NVIF_CLASS_MEM_NV50: + ret = nvif_mem_init_type(mmu, cli->mem->oclass, + drm->ttm.type_vram, page, size, + &(struct nv50_mem_v0) { + .bankswz = mmu->kind[mem->kind] == 2, + .contig = contig, + }, sizeof(struct nv50_mem_v0), + &mem->mem); + break; + default: + ret = -ENOSYS; + WARN_ON(1); + break; + } + cli->base.super = super; + mutex_unlock(&drm->master.lock); + + reg->start = mem->mem.addr >> PAGE_SHIFT; + return ret; +} + +void +nouveau_mem_del(struct ttm_mem_reg *reg) +{ + struct nouveau_mem *mem = nouveau_mem(reg); + nouveau_mem_fini(mem); + kfree(reg->mm_node); + reg->mm_node = NULL; +} + +int +nouveau_mem_new(struct nouveau_cli *cli, u8 kind, u8 comp, + struct ttm_mem_reg *reg) +{ + struct nouveau_mem *mem; + + if (!(mem = kzalloc(sizeof(*mem), GFP_KERNEL))) + return -ENOMEM; + mem->cli = cli; + mem->kind = kind; + mem->comp = comp; + + reg->mm_node = mem; + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.h b/drivers/gpu/drm/nouveau/nouveau_mem.h new file mode 100644 index 000000000000..f6d039e73812 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nouveau_mem.h @@ -0,0 +1,30 @@ +#ifndef __NOUVEAU_MEM_H__ +#define __NOUVEAU_MEM_H__ +#include <drm/ttm/ttm_bo_api.h> +struct ttm_dma_tt; + +#include <nvif/mem.h> +#include <nvif/vmm.h> + +static inline struct nouveau_mem * +nouveau_mem(struct ttm_mem_reg *reg) +{ + return reg->mm_node; +} + +struct nouveau_mem { + struct nouveau_cli *cli; + u8 kind; + u8 comp; + struct nvif_mem mem; + struct nvif_vma vma[2]; +}; + +int nouveau_mem_new(struct nouveau_cli *, u8 kind, u8 comp, + struct ttm_mem_reg *); +void nouveau_mem_del(struct ttm_mem_reg *); +int nouveau_mem_vram(struct ttm_mem_reg *, bool contig, u8 page); +int nouveau_mem_host(struct ttm_mem_reg *, struct ttm_dma_tt *); +void nouveau_mem_fini(struct nouveau_mem *); +int nouveau_mem_map(struct nouveau_mem *, struct nvif_vmm *, struct nvif_vma *); +#endif diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c index b7ab268f7d6f..941bf33bd249 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c +++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c @@ -2,6 +2,7 @@ #include <linux/slab.h> #include "nouveau_drv.h" +#include "nouveau_mem.h" #include "nouveau_ttm.h" struct nouveau_sgdma_be { @@ -9,7 +10,7 @@ struct nouveau_sgdma_be { * nouve_bo.c works properly, otherwise have to move them here */ struct ttm_dma_tt ttm; - struct nvkm_mem *node; + struct nouveau_mem *mem; }; static void @@ -27,19 +28,20 @@ static int nv04_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *reg) { struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm; - struct nvkm_mem *node = reg->mm_node; - - if (ttm->sg) { - node->sg = ttm->sg; - node->pages = NULL; - } else { - node->sg = NULL; - node->pages = nvbe->ttm.dma_address; + struct nouveau_mem *mem = nouveau_mem(reg); + int ret; + + ret = nouveau_mem_host(reg, &nvbe->ttm); + if (ret) + return ret; + + ret = nouveau_mem_map(mem, &mem->cli->vmm.vmm, &mem->vma[0]); + if (ret) { + nouveau_mem_fini(mem); + return ret; } - node->size = (reg->num_pages << PAGE_SHIFT) >> 12; - nvkm_vm_map(&node->vma[0], node); - nvbe->node = node; + nvbe->mem = mem; return 0; } @@ -47,7 +49,7 @@ static int nv04_sgdma_unbind(struct ttm_tt *ttm) { struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm; - nvkm_vm_unmap(&nvbe->node->vma[0]); + nouveau_mem_fini(nvbe->mem); return 0; } @@ -61,30 +63,20 @@ static int nv50_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *reg) { struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm; - struct nvkm_mem *node = reg->mm_node; - - /* noop: bound in move_notify() */ - if (ttm->sg) { - node->sg = ttm->sg; - node->pages = NULL; - } else { - node->sg = NULL; - node->pages = nvbe->ttm.dma_address; - } - node->size = (reg->num_pages << PAGE_SHIFT) >> 12; - return 0; -} + struct nouveau_mem *mem = nouveau_mem(reg); + int ret; -static int -nv50_sgdma_unbind(struct ttm_tt *ttm) -{ - /* noop: unbound in move_notify() */ + ret = nouveau_mem_host(reg, &nvbe->ttm); + if (ret) + return ret; + + nvbe->mem = mem; return 0; } static struct ttm_backend_func nv50_sgdma_backend = { .bind = nv50_sgdma_bind, - .unbind = nv50_sgdma_unbind, + .unbind = nv04_sgdma_unbind, .destroy = nouveau_sgdma_destroy }; diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index b0ad7fcefcf5..08b974b30482 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -23,53 +23,37 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - #include "nouveau_drv.h" -#include "nouveau_ttm.h" #include "nouveau_gem.h" +#include "nouveau_mem.h" +#include "nouveau_ttm.h" #include <drm/drm_legacy.h> #include <core/tegra.h> static int -nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long psize) +nouveau_manager_init(struct ttm_mem_type_manager *man, unsigned long psize) { - struct nouveau_drm *drm = nouveau_bdev(man->bdev); - struct nvkm_fb *fb = nvxx_fb(&drm->client.device); - man->priv = fb; return 0; } static int -nouveau_vram_manager_fini(struct ttm_mem_type_manager *man) +nouveau_manager_fini(struct ttm_mem_type_manager *man) { - man->priv = NULL; return 0; } -static inline void -nvkm_mem_node_cleanup(struct nvkm_mem *node) +static void +nouveau_manager_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *reg) { - if (node->vma[0].node) { - nvkm_vm_unmap(&node->vma[0]); - nvkm_vm_put(&node->vma[0]); - } - - if (node->vma[1].node) { - nvkm_vm_unmap(&node->vma[1]); - nvkm_vm_put(&node->vma[1]); - } + nouveau_mem_del(reg); } static void -nouveau_vram_manager_del(struct ttm_mem_type_manager *man, - struct ttm_mem_reg *reg) +nouveau_manager_debug(struct ttm_mem_type_manager *man, + struct drm_printer *printer) { - struct nouveau_drm *drm = nouveau_bdev(man->bdev); - struct nvkm_ram *ram = nvxx_fb(&drm->client.device)->ram; - nvkm_mem_node_cleanup(reg->mm_node); - ram->func->put(ram, (struct nvkm_mem **)®->mm_node); } static int @@ -78,192 +62,105 @@ nouveau_vram_manager_new(struct ttm_mem_type_manager *man, const struct ttm_place *place, struct ttm_mem_reg *reg) { - struct nouveau_drm *drm = nouveau_bdev(man->bdev); - struct nvkm_ram *ram = nvxx_fb(&drm->client.device)->ram; struct nouveau_bo *nvbo = nouveau_bo(bo); - struct nvkm_mem *node; - u32 size_nc = 0; + struct nouveau_drm *drm = nvbo->cli->drm; + struct nouveau_mem *mem; int ret; if (drm->client.device.info.ram_size == 0) return -ENOMEM; - if (nvbo->tile_flags & NOUVEAU_GEM_TILE_NONCONTIG) - size_nc = 1 << nvbo->page_shift; + ret = nouveau_mem_new(&drm->master, nvbo->kind, nvbo->comp, reg); + mem = nouveau_mem(reg); + if (ret) + return ret; - ret = ram->func->get(ram, reg->num_pages << PAGE_SHIFT, - reg->page_alignment << PAGE_SHIFT, size_nc, - (nvbo->tile_flags >> 8) & 0x3ff, &node); + ret = nouveau_mem_vram(reg, nvbo->contig, nvbo->page); if (ret) { - reg->mm_node = NULL; - return (ret == -ENOSPC) ? 0 : ret; + nouveau_mem_del(reg); + if (ret == -ENOSPC) { + reg->mm_node = NULL; + return 0; + } + return ret; } - node->page_shift = nvbo->page_shift; - - reg->mm_node = node; - reg->start = node->offset >> PAGE_SHIFT; return 0; } const struct ttm_mem_type_manager_func nouveau_vram_manager = { - .init = nouveau_vram_manager_init, - .takedown = nouveau_vram_manager_fini, + .init = nouveau_manager_init, + .takedown = nouveau_manager_fini, .get_node = nouveau_vram_manager_new, - .put_node = nouveau_vram_manager_del, + .put_node = nouveau_manager_del, + .debug = nouveau_manager_debug, }; static int -nouveau_gart_manager_init(struct ttm_mem_type_manager *man, unsigned long psize) -{ - return 0; -} - -static int -nouveau_gart_manager_fini(struct ttm_mem_type_manager *man) -{ - return 0; -} - -static void -nouveau_gart_manager_del(struct ttm_mem_type_manager *man, - struct ttm_mem_reg *reg) -{ - nvkm_mem_node_cleanup(reg->mm_node); - kfree(reg->mm_node); - reg->mm_node = NULL; -} - -static int nouveau_gart_manager_new(struct ttm_mem_type_manager *man, struct ttm_buffer_object *bo, const struct ttm_place *place, struct ttm_mem_reg *reg) { - struct nouveau_drm *drm = nouveau_bdev(bo->bdev); struct nouveau_bo *nvbo = nouveau_bo(bo); - struct nvkm_mem *node; - - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (!node) - return -ENOMEM; + struct nouveau_drm *drm = nvbo->cli->drm; + struct nouveau_mem *mem; + int ret; - node->page_shift = 12; - - switch (drm->client.device.info.family) { - case NV_DEVICE_INFO_V0_TNT: - case NV_DEVICE_INFO_V0_CELSIUS: - case NV_DEVICE_INFO_V0_KELVIN: - case NV_DEVICE_INFO_V0_RANKINE: - case NV_DEVICE_INFO_V0_CURIE: - break; - case NV_DEVICE_INFO_V0_TESLA: - if (drm->client.device.info.chipset != 0x50) - node->memtype = (nvbo->tile_flags & 0x7f00) >> 8; - break; - case NV_DEVICE_INFO_V0_FERMI: - case NV_DEVICE_INFO_V0_KEPLER: - case NV_DEVICE_INFO_V0_MAXWELL: - case NV_DEVICE_INFO_V0_PASCAL: - node->memtype = (nvbo->tile_flags & 0xff00) >> 8; - break; - default: - NV_WARN(drm, "%s: unhandled family type %x\n", __func__, - drm->client.device.info.family); - break; - } + ret = nouveau_mem_new(&drm->master, nvbo->kind, nvbo->comp, reg); + mem = nouveau_mem(reg); + if (ret) + return ret; - reg->mm_node = node; - reg->start = 0; + reg->start = 0; return 0; } -static void -nouveau_gart_manager_debug(struct ttm_mem_type_manager *man, - struct drm_printer *printer) -{ -} - const struct ttm_mem_type_manager_func nouveau_gart_manager = { - .init = nouveau_gart_manager_init, - .takedown = nouveau_gart_manager_fini, + .init = nouveau_manager_init, + .takedown = nouveau_manager_fini, .get_node = nouveau_gart_manager_new, - .put_node = nouveau_gart_manager_del, - .debug = nouveau_gart_manager_debug + .put_node = nouveau_manager_del, + .debug = nouveau_manager_debug }; -/*XXX*/ -#include <subdev/mmu/nv04.h> -static int -nv04_gart_manager_init(struct ttm_mem_type_manager *man, unsigned long psize) -{ - struct nouveau_drm *drm = nouveau_bdev(man->bdev); - struct nvkm_mmu *mmu = nvxx_mmu(&drm->client.device); - struct nv04_mmu *priv = (void *)mmu; - struct nvkm_vm *vm = NULL; - nvkm_vm_ref(priv->vm, &vm, NULL); - man->priv = vm; - return 0; -} - -static int -nv04_gart_manager_fini(struct ttm_mem_type_manager *man) -{ - struct nvkm_vm *vm = man->priv; - nvkm_vm_ref(NULL, &vm, NULL); - man->priv = NULL; - return 0; -} - -static void -nv04_gart_manager_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *reg) -{ - struct nvkm_mem *node = reg->mm_node; - if (node->vma[0].node) - nvkm_vm_put(&node->vma[0]); - kfree(reg->mm_node); - reg->mm_node = NULL; -} - static int nv04_gart_manager_new(struct ttm_mem_type_manager *man, struct ttm_buffer_object *bo, const struct ttm_place *place, struct ttm_mem_reg *reg) { - struct nvkm_mem *node; + struct nouveau_bo *nvbo = nouveau_bo(bo); + struct nouveau_drm *drm = nvbo->cli->drm; + struct nouveau_mem *mem; int ret; - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (!node) - return -ENOMEM; - - node->page_shift = 12; + ret = nouveau_mem_new(&drm->master, nvbo->kind, nvbo->comp, reg); + mem = nouveau_mem(reg); + if (ret) + return ret; - ret = nvkm_vm_get(man->priv, reg->num_pages << 12, node->page_shift, - NV_MEM_ACCESS_RW, &node->vma[0]); + ret = nvif_vmm_get(&mem->cli->vmm.vmm, PTES, false, 12, 0, + reg->num_pages << PAGE_SHIFT, &mem->vma[0]); if (ret) { - kfree(node); + nouveau_mem_del(reg); + if (ret == -ENOSPC) { + reg->mm_node = NULL; + return 0; + } return ret; } - reg->mm_node = node; - reg->start = node->vma[0].offset >> PAGE_SHIFT; + reg->start = mem->vma[0].addr >> PAGE_SHIFT; return 0; } -static void -nv04_gart_manager_debug(struct ttm_mem_type_manager *man, - struct drm_printer *printer) -{ -} - const struct ttm_mem_type_manager_func nv04_gart_manager = { - .init = nv04_gart_manager_init, - .takedown = nv04_gart_manager_fini, + .init = nouveau_manager_init, + .takedown = nouveau_manager_fini, .get_node = nv04_gart_manager_new, - .put_node = nv04_gart_manager_del, - .debug = nv04_gart_manager_debug + .put_node = nouveau_manager_del, + .debug = nouveau_manager_debug }; int @@ -343,44 +240,43 @@ nouveau_ttm_init(struct nouveau_drm *drm) { struct nvkm_device *device = nvxx_device(&drm->client.device); struct nvkm_pci *pci = device->pci; + struct nvif_mmu *mmu = &drm->client.mmu; struct drm_device *dev = drm->dev; - u8 bits; - int ret; + int typei, ret; - if (pci && pci->agp.bridge) { - drm->agp.bridge = pci->agp.bridge; - drm->agp.base = pci->agp.base; - drm->agp.size = pci->agp.size; - drm->agp.cma = pci->agp.cma; - } + typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | + NVIF_MEM_COHERENT); + if (typei < 0) + return -ENOSYS; - bits = nvxx_mmu(&drm->client.device)->dma_bits; - if (nvxx_device(&drm->client.device)->func->pci) { - if (drm->agp.bridge) - bits = 32; - } else if (device->func->tegra) { - struct nvkm_device_tegra *tegra = device->func->tegra(device); + drm->ttm.type_host = typei; - /* - * If the platform can use a IOMMU, then the addressable DMA - * space is constrained by the IOMMU bit - */ - if (tegra->func->iommu_bit) - bits = min(bits, tegra->func->iommu_bit); + typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE); + if (typei < 0) + return -ENOSYS; - } + drm->ttm.type_ncoh = typei; - ret = dma_set_mask(dev->dev, DMA_BIT_MASK(bits)); - if (ret && bits != 32) { - bits = 32; - ret = dma_set_mask(dev->dev, DMA_BIT_MASK(bits)); + if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC && + drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { + typei = nvif_mmu_type(mmu, NVIF_MEM_VRAM | NVIF_MEM_MAPPABLE | + NVIF_MEM_KIND | + NVIF_MEM_COMP | + NVIF_MEM_DISP); + if (typei < 0) + return -ENOSYS; + + drm->ttm.type_vram = typei; + } else { + drm->ttm.type_vram = -1; } - if (ret) - return ret; - ret = dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(bits)); - if (ret) - dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(32)); + if (pci && pci->agp.bridge) { + drm->agp.bridge = pci->agp.bridge; + drm->agp.base = pci->agp.base; + drm->agp.size = pci->agp.size; + drm->agp.cma = pci->agp.cma; + } ret = nouveau_ttm_global_init(drm); if (ret) @@ -391,7 +287,7 @@ nouveau_ttm_init(struct nouveau_drm *drm) &nouveau_bo_driver, dev->anon_inode->i_mapping, DRM_FILE_PAGE_OFFSET, - bits <= 32 ? true : false); + drm->client.mmu.dmabits <= 32 ? true : false); if (ret) { NV_ERROR(drm, "error initialising bo driver, %d\n", ret); return ret; @@ -415,7 +311,7 @@ nouveau_ttm_init(struct nouveau_drm *drm) /* GART init */ if (!drm->agp.bridge) { - drm->gem.gart_available = nvxx_mmu(&drm->client.device)->limit; + drm->gem.gart_available = drm->client.vmm.vmm.limit; } else { drm->gem.gart_available = drm->agp.size; } diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c new file mode 100644 index 000000000000..9e2628dd8e4d --- /dev/null +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c @@ -0,0 +1,135 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "nouveau_vmm.h" +#include "nouveau_drv.h" +#include "nouveau_bo.h" +#include "nouveau_mem.h" + +void +nouveau_vma_unmap(struct nouveau_vma *vma) +{ + if (vma->mem) { + nvif_vmm_unmap(&vma->vmm->vmm, vma->addr); + vma->mem = NULL; + } +} + +int +nouveau_vma_map(struct nouveau_vma *vma, struct nouveau_mem *mem) +{ + struct nvif_vma tmp = { .addr = vma->addr }; + int ret = nouveau_mem_map(mem, &vma->vmm->vmm, &tmp); + if (ret) + return ret; + vma->mem = mem; + return 0; +} + +struct nouveau_vma * +nouveau_vma_find(struct nouveau_bo *nvbo, struct nouveau_vmm *vmm) +{ + struct nouveau_vma *vma; + + list_for_each_entry(vma, &nvbo->vma_list, head) { + if (vma->vmm == vmm) + return vma; + } + + return NULL; +} + +void +nouveau_vma_del(struct nouveau_vma **pvma) +{ + struct nouveau_vma *vma = *pvma; + if (vma && --vma->refs <= 0) { + if (likely(vma->addr != ~0ULL)) { + struct nvif_vma tmp = { .addr = vma->addr, .size = 1 }; + nvif_vmm_put(&vma->vmm->vmm, &tmp); + } + list_del(&vma->head); + *pvma = NULL; + kfree(*pvma); + } +} + +int +nouveau_vma_new(struct nouveau_bo *nvbo, struct nouveau_vmm *vmm, + struct nouveau_vma **pvma) +{ + struct nouveau_mem *mem = nouveau_mem(&nvbo->bo.mem); + struct nouveau_vma *vma; + struct nvif_vma tmp; + int ret; + + if ((vma = *pvma = nouveau_vma_find(nvbo, vmm))) { + vma->refs++; + return 0; + } + + if (!(vma = *pvma = kmalloc(sizeof(*vma), GFP_KERNEL))) + return -ENOMEM; + vma->vmm = vmm; + vma->refs = 1; + vma->addr = ~0ULL; + vma->mem = NULL; + list_add_tail(&vma->head, &nvbo->vma_list); + + if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM && + mem->mem.page == nvbo->page) { + ret = nvif_vmm_get(&vmm->vmm, LAZY, false, mem->mem.page, 0, + mem->mem.size, &tmp); + if (ret) + goto done; + + vma->addr = tmp.addr; + ret = nouveau_vma_map(vma, mem); + } else { + ret = nvif_vmm_get(&vmm->vmm, PTES, false, mem->mem.page, 0, + mem->mem.size, &tmp); + vma->addr = tmp.addr; + } + +done: + if (ret) + nouveau_vma_del(pvma); + return ret; +} + +void +nouveau_vmm_fini(struct nouveau_vmm *vmm) +{ + nvif_vmm_fini(&vmm->vmm); + vmm->cli = NULL; +} + +int +nouveau_vmm_init(struct nouveau_cli *cli, s32 oclass, struct nouveau_vmm *vmm) +{ + int ret = nvif_vmm_init(&cli->mmu, oclass, PAGE_SIZE, 0, NULL, 0, + &vmm->vmm); + if (ret) + return ret; + + vmm->cli = cli; + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.h b/drivers/gpu/drm/nouveau/nouveau_vmm.h new file mode 100644 index 000000000000..5c31f43678d3 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.h @@ -0,0 +1,31 @@ +#ifndef __NOUVEAU_VMA_H__ +#define __NOUVEAU_VMA_H__ +#include <nvif/vmm.h> +struct nouveau_bo; +struct nouveau_mem; + +struct nouveau_vma { + struct nouveau_vmm *vmm; + int refs; + struct list_head head; + u64 addr; + + struct nouveau_mem *mem; +}; + +struct nouveau_vma *nouveau_vma_find(struct nouveau_bo *, struct nouveau_vmm *); +int nouveau_vma_new(struct nouveau_bo *, struct nouveau_vmm *, + struct nouveau_vma **); +void nouveau_vma_del(struct nouveau_vma **); +int nouveau_vma_map(struct nouveau_vma *, struct nouveau_mem *); +void nouveau_vma_unmap(struct nouveau_vma *); + +struct nouveau_vmm { + struct nouveau_cli *cli; + struct nvif_vmm vmm; + struct nvkm_vm *vm; +}; + +int nouveau_vmm_init(struct nouveau_cli *, s32 oclass, struct nouveau_vmm *); +void nouveau_vmm_fini(struct nouveau_vmm *); +#endif diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 2dbf62a2ac41..92d46222c79d 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -318,7 +318,7 @@ nv50_chan_create(struct nvif_device *device, struct nvif_object *disp, ret = nvif_object_init(disp, 0, oclass[0], data, size, &chan->user); if (ret == 0) - nvif_object_map(&chan->user); + nvif_object_map(&chan->user, NULL, 0); nvif_object_sclass_put(&sclass); return ret; } @@ -424,7 +424,7 @@ nv50_dmac_ctxdma_new(struct nv50_dmac *dmac, struct nouveau_framebuffer *fb) { struct nouveau_drm *drm = nouveau_drm(fb->base.dev); struct nv50_dmac_ctxdma *ctxdma; - const u8 kind = (fb->nvbo->tile_flags & 0x0000ff00) >> 8; + const u8 kind = fb->nvbo->kind; const u32 handle = 0xfb000000 | kind; struct { struct nv_dma_v0 base; @@ -510,6 +510,7 @@ nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, int ret; mutex_init(&dmac->lock); + INIT_LIST_HEAD(&dmac->ctxdma); dmac->ptr = dma_alloc_coherent(nvxx_device(device)->dev, PAGE_SIZE, &dmac->handle, GFP_KERNEL); @@ -556,7 +557,6 @@ nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, if (ret) return ret; - INIT_LIST_HEAD(&dmac->ctxdma); return ret; } @@ -847,7 +847,7 @@ nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw, asyw->image.w = fb->base.width; asyw->image.h = fb->base.height; - asyw->image.kind = (fb->nvbo->tile_flags & 0x0000ff00) >> 8; + asyw->image.kind = fb->nvbo->kind; if (asyh->state.pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) asyw->interval = 0; @@ -857,9 +857,9 @@ nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw, if (asyw->image.kind) { asyw->image.layout = 0; if (drm->client.device.info.chipset >= 0xc0) - asyw->image.block = fb->nvbo->tile_mode >> 4; + asyw->image.block = fb->nvbo->mode >> 4; else - asyw->image.block = fb->nvbo->tile_mode; + asyw->image.block = fb->nvbo->mode; asyw->image.pitch = (fb->base.pitches[0] / 4) << 4; } else { asyw->image.layout = 1; @@ -3265,11 +3265,14 @@ nv50_mstm = { void nv50_mstm_service(struct nv50_mstm *mstm) { - struct drm_dp_aux *aux = mstm->mgr.aux; + struct drm_dp_aux *aux = mstm ? mstm->mgr.aux : NULL; bool handled = true; int ret; u8 esi[8] = {}; + if (!aux) + return; + while (handled) { ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT_ESI, esi, 8); if (ret != 8) { diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c index 327dcd7901ed..facd18564e0d 100644 --- a/drivers/gpu/drm/nouveau/nv50_fbcon.c +++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c @@ -25,6 +25,7 @@ #include "nouveau_drv.h" #include "nouveau_dma.h" #include "nouveau_fbcon.h" +#include "nouveau_vmm.h" int nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect) @@ -239,8 +240,8 @@ nv50_fbcon_accel_init(struct fb_info *info) OUT_RING(chan, info->fix.line_length); OUT_RING(chan, info->var.xres_virtual); OUT_RING(chan, info->var.yres_virtual); - OUT_RING(chan, upper_32_bits(fb->vma.offset)); - OUT_RING(chan, lower_32_bits(fb->vma.offset)); + OUT_RING(chan, upper_32_bits(fb->vma->addr)); + OUT_RING(chan, lower_32_bits(fb->vma->addr)); BEGIN_NV04(chan, NvSub2D, 0x0230, 2); OUT_RING(chan, format); OUT_RING(chan, 1); @@ -248,8 +249,8 @@ nv50_fbcon_accel_init(struct fb_info *info) OUT_RING(chan, info->fix.line_length); OUT_RING(chan, info->var.xres_virtual); OUT_RING(chan, info->var.yres_virtual); - OUT_RING(chan, upper_32_bits(fb->vma.offset)); - OUT_RING(chan, lower_32_bits(fb->vma.offset)); + OUT_RING(chan, upper_32_bits(fb->vma->addr)); + OUT_RING(chan, lower_32_bits(fb->vma->addr)); FIRE_RING(chan); return 0; diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c index bd7a8a1e4ad9..5f0c0c27d5dc 100644 --- a/drivers/gpu/drm/nouveau/nv84_fence.c +++ b/drivers/gpu/drm/nouveau/nv84_fence.c @@ -25,6 +25,7 @@ #include "nouveau_drv.h" #include "nouveau_dma.h" #include "nouveau_fence.h" +#include "nouveau_vmm.h" #include "nv50_display.h" @@ -68,12 +69,7 @@ nv84_fence_emit(struct nouveau_fence *fence) { struct nouveau_channel *chan = fence->channel; struct nv84_fence_chan *fctx = chan->fence; - u64 addr = chan->chid * 16; - - if (fence->sysmem) - addr += fctx->vma_gart.offset; - else - addr += fctx->vma.offset; + u64 addr = fctx->vma->addr + chan->chid * 16; return fctx->base.emit32(chan, addr, fence->base.seqno); } @@ -83,12 +79,7 @@ nv84_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *prev, struct nouveau_channel *chan) { struct nv84_fence_chan *fctx = chan->fence; - u64 addr = prev->chid * 16; - - if (fence->sysmem) - addr += fctx->vma_gart.offset; - else - addr += fctx->vma.offset; + u64 addr = fctx->vma->addr + prev->chid * 16; return fctx->base.sync32(chan, addr, fence->base.seqno); } @@ -108,8 +99,7 @@ nv84_fence_context_del(struct nouveau_channel *chan) nouveau_bo_wr32(priv->bo, chan->chid * 16 / 4, fctx->base.sequence); mutex_lock(&priv->mutex); - nouveau_bo_vma_del(priv->bo, &fctx->vma_gart); - nouveau_bo_vma_del(priv->bo, &fctx->vma); + nouveau_vma_del(&fctx->vma); mutex_unlock(&priv->mutex); nouveau_fence_context_del(&fctx->base); chan->fence = NULL; @@ -137,11 +127,7 @@ nv84_fence_context_new(struct nouveau_channel *chan) fctx->base.sequence = nv84_fence_read(chan); mutex_lock(&priv->mutex); - ret = nouveau_bo_vma_add(priv->bo, cli->vm, &fctx->vma); - if (ret == 0) { - ret = nouveau_bo_vma_add(priv->bo_gart, cli->vm, - &fctx->vma_gart); - } + ret = nouveau_vma_new(priv->bo, &cli->vmm, &fctx->vma); mutex_unlock(&priv->mutex); if (ret) @@ -182,10 +168,6 @@ static void nv84_fence_destroy(struct nouveau_drm *drm) { struct nv84_fence_priv *priv = drm->fence; - nouveau_bo_unmap(priv->bo_gart); - if (priv->bo_gart) - nouveau_bo_unpin(priv->bo_gart); - nouveau_bo_ref(NULL, &priv->bo_gart); nouveau_bo_unmap(priv->bo); if (priv->bo) nouveau_bo_unpin(priv->bo); @@ -238,21 +220,6 @@ nv84_fence_create(struct nouveau_drm *drm) nouveau_bo_ref(NULL, &priv->bo); } - if (ret == 0) - ret = nouveau_bo_new(&drm->client, 16 * priv->base.contexts, 0, - TTM_PL_FLAG_TT | TTM_PL_FLAG_UNCACHED, 0, - 0, NULL, NULL, &priv->bo_gart); - if (ret == 0) { - ret = nouveau_bo_pin(priv->bo_gart, TTM_PL_FLAG_TT, false); - if (ret == 0) { - ret = nouveau_bo_map(priv->bo_gart); - if (ret) - nouveau_bo_unpin(priv->bo_gart); - } - if (ret) - nouveau_bo_ref(NULL, &priv->bo_gart); - } - if (ret) nv84_fence_destroy(drm); return ret; diff --git a/drivers/gpu/drm/nouveau/nvc0_fbcon.c b/drivers/gpu/drm/nouveau/nvc0_fbcon.c index 90f27bfa381f..c0deef4fe727 100644 --- a/drivers/gpu/drm/nouveau/nvc0_fbcon.c +++ b/drivers/gpu/drm/nouveau/nvc0_fbcon.c @@ -25,6 +25,7 @@ #include "nouveau_drv.h" #include "nouveau_dma.h" #include "nouveau_fbcon.h" +#include "nouveau_vmm.h" int nvc0_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect) @@ -239,8 +240,8 @@ nvc0_fbcon_accel_init(struct fb_info *info) OUT_RING (chan, info->fix.line_length); OUT_RING (chan, info->var.xres_virtual); OUT_RING (chan, info->var.yres_virtual); - OUT_RING (chan, upper_32_bits(fb->vma.offset)); - OUT_RING (chan, lower_32_bits(fb->vma.offset)); + OUT_RING (chan, upper_32_bits(fb->vma->addr)); + OUT_RING (chan, lower_32_bits(fb->vma->addr)); BEGIN_NVC0(chan, NvSub2D, 0x0230, 10); OUT_RING (chan, format); OUT_RING (chan, 1); @@ -250,8 +251,8 @@ nvc0_fbcon_accel_init(struct fb_info *info) OUT_RING (chan, info->fix.line_length); OUT_RING (chan, info->var.xres_virtual); OUT_RING (chan, info->var.yres_virtual); - OUT_RING (chan, upper_32_bits(fb->vma.offset)); - OUT_RING (chan, lower_32_bits(fb->vma.offset)); + OUT_RING (chan, upper_32_bits(fb->vma->addr)); + OUT_RING (chan, lower_32_bits(fb->vma->addr)); FIRE_RING (chan); return 0; diff --git a/drivers/gpu/drm/nouveau/nvif/Kbuild b/drivers/gpu/drm/nouveau/nvif/Kbuild index 067b5e9f5ec1..f1675a4ab6fa 100644 --- a/drivers/gpu/drm/nouveau/nvif/Kbuild +++ b/drivers/gpu/drm/nouveau/nvif/Kbuild @@ -2,4 +2,7 @@ nvif-y := nvif/object.o nvif-y += nvif/client.o nvif-y += nvif/device.o nvif-y += nvif/driver.o +nvif-y += nvif/mem.o +nvif-y += nvif/mmu.o nvif-y += nvif/notify.o +nvif-y += nvif/vmm.o diff --git a/drivers/gpu/drm/nouveau/nvif/mem.c b/drivers/gpu/drm/nouveau/nvif/mem.c new file mode 100644 index 000000000000..0f9382c60145 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvif/mem.c @@ -0,0 +1,88 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include <nvif/mem.h> +#include <nvif/client.h> + +#include <nvif/if000a.h> + +void +nvif_mem_fini(struct nvif_mem *mem) +{ + nvif_object_fini(&mem->object); +} + +int +nvif_mem_init_type(struct nvif_mmu *mmu, s32 oclass, int type, u8 page, + u64 size, void *argv, u32 argc, struct nvif_mem *mem) +{ + struct nvif_mem_v0 *args; + u8 stack[128]; + int ret; + + mem->object.client = NULL; + if (type < 0) + return -EINVAL; + + if (sizeof(*args) + argc > sizeof(stack)) { + if (!(args = kmalloc(sizeof(*args) + argc, GFP_KERNEL))) + return -ENOMEM; + } else { + args = (void *)stack; + } + args->version = 0; + args->type = type; + args->page = page; + args->size = size; + memcpy(args->data, argv, argc); + + ret = nvif_object_init(&mmu->object, 0, oclass, args, + sizeof(*args) + argc, &mem->object); + if (ret == 0) { + mem->type = mmu->type[type].type; + mem->page = args->page; + mem->addr = args->addr; + mem->size = args->size; + } + + if (args != (void *)stack) + kfree(args); + return ret; + +} + +int +nvif_mem_init(struct nvif_mmu *mmu, s32 oclass, u8 type, u8 page, + u64 size, void *argv, u32 argc, struct nvif_mem *mem) +{ + int ret = -EINVAL, i; + + mem->object.client = NULL; + + for (i = 0; ret && i < mmu->type_nr; i++) { + if ((mmu->type[i].type & type) == type) { + ret = nvif_mem_init_type(mmu, oclass, i, page, size, + argv, argc, mem); + } + } + + return ret; +} diff --git a/drivers/gpu/drm/nouveau/nvif/mmu.c b/drivers/gpu/drm/nouveau/nvif/mmu.c new file mode 100644 index 000000000000..15d0dcbf7ab4 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvif/mmu.c @@ -0,0 +1,117 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include <nvif/mmu.h> + +#include <nvif/class.h> +#include <nvif/if0008.h> + +void +nvif_mmu_fini(struct nvif_mmu *mmu) +{ + kfree(mmu->kind); + kfree(mmu->type); + kfree(mmu->heap); + nvif_object_fini(&mmu->object); +} + +int +nvif_mmu_init(struct nvif_object *parent, s32 oclass, struct nvif_mmu *mmu) +{ + struct nvif_mmu_v0 args; + int ret, i; + + args.version = 0; + mmu->heap = NULL; + mmu->type = NULL; + mmu->kind = NULL; + + ret = nvif_object_init(parent, 0, oclass, &args, sizeof(args), + &mmu->object); + if (ret) + goto done; + + mmu->dmabits = args.dmabits; + mmu->heap_nr = args.heap_nr; + mmu->type_nr = args.type_nr; + mmu->kind_nr = args.kind_nr; + + mmu->heap = kmalloc(sizeof(*mmu->heap) * mmu->heap_nr, GFP_KERNEL); + mmu->type = kmalloc(sizeof(*mmu->type) * mmu->type_nr, GFP_KERNEL); + if (ret = -ENOMEM, !mmu->heap || !mmu->type) + goto done; + + mmu->kind = kmalloc(sizeof(*mmu->kind) * mmu->kind_nr, GFP_KERNEL); + if (!mmu->kind && mmu->kind_nr) + goto done; + + for (i = 0; i < mmu->heap_nr; i++) { + struct nvif_mmu_heap_v0 args = { .index = i }; + + ret = nvif_object_mthd(&mmu->object, NVIF_MMU_V0_HEAP, + &args, sizeof(args)); + if (ret) + goto done; + + mmu->heap[i].size = args.size; + } + + for (i = 0; i < mmu->type_nr; i++) { + struct nvif_mmu_type_v0 args = { .index = i }; + + ret = nvif_object_mthd(&mmu->object, NVIF_MMU_V0_TYPE, + &args, sizeof(args)); + if (ret) + goto done; + + mmu->type[i].type = 0; + if (args.vram) mmu->type[i].type |= NVIF_MEM_VRAM; + if (args.host) mmu->type[i].type |= NVIF_MEM_HOST; + if (args.comp) mmu->type[i].type |= NVIF_MEM_COMP; + if (args.disp) mmu->type[i].type |= NVIF_MEM_DISP; + if (args.kind ) mmu->type[i].type |= NVIF_MEM_KIND; + if (args.mappable) mmu->type[i].type |= NVIF_MEM_MAPPABLE; + if (args.coherent) mmu->type[i].type |= NVIF_MEM_COHERENT; + if (args.uncached) mmu->type[i].type |= NVIF_MEM_UNCACHED; + mmu->type[i].heap = args.heap; + } + + if (mmu->kind_nr) { + struct nvif_mmu_kind_v0 *kind; + u32 argc = sizeof(*kind) + sizeof(*kind->data) * mmu->kind_nr; + + if (ret = -ENOMEM, !(kind = kmalloc(argc, GFP_KERNEL))) + goto done; + kind->version = 0; + kind->count = mmu->kind_nr; + + ret = nvif_object_mthd(&mmu->object, NVIF_MMU_V0_KIND, + kind, argc); + if (ret == 0) + memcpy(mmu->kind, kind->data, kind->count); + kfree(kind); + } + +done: + if (ret) + nvif_mmu_fini(mmu); + return ret; +} diff --git a/drivers/gpu/drm/nouveau/nvif/object.c b/drivers/gpu/drm/nouveau/nvif/object.c index c3fb6a20f567..40adfe9b334b 100644 --- a/drivers/gpu/drm/nouveau/nvif/object.c +++ b/drivers/gpu/drm/nouveau/nvif/object.c @@ -166,46 +166,77 @@ nvif_object_mthd(struct nvif_object *object, u32 mthd, void *data, u32 size) } void -nvif_object_unmap(struct nvif_object *object) +nvif_object_unmap_handle(struct nvif_object *object) +{ + struct { + struct nvif_ioctl_v0 ioctl; + struct nvif_ioctl_unmap unmap; + } args = { + .ioctl.type = NVIF_IOCTL_V0_UNMAP, + }; + + nvif_object_ioctl(object, &args, sizeof(args), NULL); +} + +int +nvif_object_map_handle(struct nvif_object *object, void *argv, u32 argc, + u64 *handle, u64 *length) { - if (object->map.size) { - struct nvif_client *client = object->client; - struct { - struct nvif_ioctl_v0 ioctl; - struct nvif_ioctl_unmap unmap; - } args = { - .ioctl.type = NVIF_IOCTL_V0_UNMAP, - }; + struct { + struct nvif_ioctl_v0 ioctl; + struct nvif_ioctl_map_v0 map; + } *args; + u32 argn = sizeof(*args) + argc; + int ret, maptype; + + if (!(args = kzalloc(argn, GFP_KERNEL))) + return -ENOMEM; + args->ioctl.type = NVIF_IOCTL_V0_MAP; + memcpy(args->map.data, argv, argc); - if (object->map.ptr) { + ret = nvif_object_ioctl(object, args, argn, NULL); + *handle = args->map.handle; + *length = args->map.length; + maptype = args->map.type; + kfree(args); + return ret ? ret : (maptype == NVIF_IOCTL_MAP_V0_IO); +} + +void +nvif_object_unmap(struct nvif_object *object) +{ + struct nvif_client *client = object->client; + if (object->map.ptr) { + if (object->map.size) { client->driver->unmap(client, object->map.ptr, object->map.size); - object->map.ptr = NULL; + object->map.size = 0; } - - nvif_object_ioctl(object, &args, sizeof(args), NULL); - object->map.size = 0; + object->map.ptr = NULL; + nvif_object_unmap_handle(object); } } int -nvif_object_map(struct nvif_object *object) +nvif_object_map(struct nvif_object *object, void *argv, u32 argc) { struct nvif_client *client = object->client; - struct { - struct nvif_ioctl_v0 ioctl; - struct nvif_ioctl_map_v0 map; - } args = { - .ioctl.type = NVIF_IOCTL_V0_MAP, - }; - int ret = nvif_object_ioctl(object, &args, sizeof(args), NULL); - if (ret == 0) { - object->map.size = args.map.length; - object->map.ptr = client->driver->map(client, args.map.handle, - object->map.size); - if (ret = -ENOMEM, object->map.ptr) + u64 handle, length; + int ret = nvif_object_map_handle(object, argv, argc, &handle, &length); + if (ret >= 0) { + if (ret) { + object->map.ptr = client->driver->map(client, + handle, + length); + if (ret = -ENOMEM, object->map.ptr) { + object->map.size = length; + return 0; + } + } else { + object->map.ptr = (void *)(unsigned long)handle; return 0; - nvif_object_unmap(object); + } + nvif_object_unmap_handle(object); } return ret; } diff --git a/drivers/gpu/drm/nouveau/nvif/vmm.c b/drivers/gpu/drm/nouveau/nvif/vmm.c new file mode 100644 index 000000000000..31cdb2d2e1ff --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvif/vmm.c @@ -0,0 +1,167 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include <nvif/vmm.h> +#include <nvif/mem.h> + +#include <nvif/if000c.h> + +int +nvif_vmm_unmap(struct nvif_vmm *vmm, u64 addr) +{ + return nvif_object_mthd(&vmm->object, NVIF_VMM_V0_UNMAP, + &(struct nvif_vmm_unmap_v0) { .addr = addr }, + sizeof(struct nvif_vmm_unmap_v0)); +} + +int +nvif_vmm_map(struct nvif_vmm *vmm, u64 addr, u64 size, void *argv, u32 argc, + struct nvif_mem *mem, u64 offset) +{ + struct nvif_vmm_map_v0 *args; + u8 stack[16]; + int ret; + + if (sizeof(*args) + argc > sizeof(stack)) { + if (!(args = kmalloc(sizeof(*args) + argc, GFP_KERNEL))) + return -ENOMEM; + } else { + args = (void *)stack; + } + + args->version = 0; + args->addr = addr; + args->size = size; + args->memory = nvif_handle(&mem->object); + args->offset = offset; + memcpy(args->data, argv, argc); + + ret = nvif_object_mthd(&vmm->object, NVIF_VMM_V0_MAP, + args, sizeof(*args) + argc); + if (args != (void *)stack) + kfree(args); + return ret; +} + +void +nvif_vmm_put(struct nvif_vmm *vmm, struct nvif_vma *vma) +{ + if (vma->size) { + WARN_ON(nvif_object_mthd(&vmm->object, NVIF_VMM_V0_PUT, + &(struct nvif_vmm_put_v0) { + .addr = vma->addr, + }, sizeof(struct nvif_vmm_put_v0))); + vma->size = 0; + } +} + +int +nvif_vmm_get(struct nvif_vmm *vmm, enum nvif_vmm_get type, bool sparse, + u8 page, u8 align, u64 size, struct nvif_vma *vma) +{ + struct nvif_vmm_get_v0 args; + int ret; + + args.version = vma->size = 0; + args.sparse = sparse; + args.page = page; + args.align = align; + args.size = size; + + switch (type) { + case ADDR: args.type = NVIF_VMM_GET_V0_ADDR; break; + case PTES: args.type = NVIF_VMM_GET_V0_PTES; break; + case LAZY: args.type = NVIF_VMM_GET_V0_LAZY; break; + default: + WARN_ON(1); + return -EINVAL; + } + + ret = nvif_object_mthd(&vmm->object, NVIF_VMM_V0_GET, + &args, sizeof(args)); + if (ret == 0) { + vma->addr = args.addr; + vma->size = args.size; + } + return ret; +} + +void +nvif_vmm_fini(struct nvif_vmm *vmm) +{ + kfree(vmm->page); + nvif_object_fini(&vmm->object); +} + +int +nvif_vmm_init(struct nvif_mmu *mmu, s32 oclass, u64 addr, u64 size, + void *argv, u32 argc, struct nvif_vmm *vmm) +{ + struct nvif_vmm_v0 *args; + u32 argn = sizeof(*args) + argc; + int ret = -ENOSYS, i; + + vmm->object.client = NULL; + vmm->page = NULL; + + if (!(args = kmalloc(argn, GFP_KERNEL))) + return -ENOMEM; + args->version = 0; + args->addr = addr; + args->size = size; + memcpy(args->data, argv, argc); + + ret = nvif_object_init(&mmu->object, 0, oclass, args, argn, + &vmm->object); + if (ret) + goto done; + + vmm->start = args->addr; + vmm->limit = args->size; + + vmm->page_nr = args->page_nr; + vmm->page = kmalloc(sizeof(*vmm->page) * vmm->page_nr, GFP_KERNEL); + if (!vmm->page) { + ret = -ENOMEM; + goto done; + } + + for (i = 0; i < vmm->page_nr; i++) { + struct nvif_vmm_page_v0 args = { .index = i }; + + ret = nvif_object_mthd(&vmm->object, NVIF_VMM_V0_PAGE, + &args, sizeof(args)); + if (ret) + break; + + vmm->page[i].shift = args.shift; + vmm->page[i].sparse = args.sparse; + vmm->page[i].vram = args.vram; + vmm->page[i].host = args.host; + vmm->page[i].comp = args.comp; + } + +done: + if (ret) + nvif_vmm_fini(vmm); + kfree(args); + return ret; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/core/client.c b/drivers/gpu/drm/nouveau/nvkm/core/client.c index 0d3a896892b4..ac671202919e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/client.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/client.c @@ -301,5 +301,7 @@ nvkm_client_new(const char *name, u64 device, const char *cfg, client->debug = nvkm_dbgopt(dbg, "CLIENT"); client->objroot = RB_ROOT; client->ntfy = ntfy; + INIT_LIST_HEAD(&client->umem); + spin_lock_init(&client->lock); return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c b/drivers/gpu/drm/nouveau/nvkm/core/engine.c index b6c916954a10..657231c3c098 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c @@ -126,6 +126,15 @@ nvkm_engine_init(struct nvkm_subdev *subdev) return ret; } +static int +nvkm_engine_preinit(struct nvkm_subdev *subdev) +{ + struct nvkm_engine *engine = nvkm_engine(subdev); + if (engine->func->preinit) + engine->func->preinit(engine); + return 0; +} + static void * nvkm_engine_dtor(struct nvkm_subdev *subdev) { @@ -138,6 +147,7 @@ nvkm_engine_dtor(struct nvkm_subdev *subdev) static const struct nvkm_subdev_func nvkm_engine_func = { .dtor = nvkm_engine_dtor, + .preinit = nvkm_engine_preinit, .init = nvkm_engine_init, .fini = nvkm_engine_fini, .intr = nvkm_engine_intr, diff --git a/drivers/gpu/drm/nouveau/nvkm/core/gpuobj.c b/drivers/gpu/drm/nouveau/nvkm/core/gpuobj.c index a7bd22706b2a..d6de2b3ed2c3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/gpuobj.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/gpuobj.c @@ -42,6 +42,14 @@ nvkm_gpuobj_wr32_fast(struct nvkm_gpuobj *gpuobj, u32 offset, u32 data) } /* accessor functions for gpuobjs allocated directly from instmem */ +static int +nvkm_gpuobj_heap_map(struct nvkm_gpuobj *gpuobj, u64 offset, + struct nvkm_vmm *vmm, struct nvkm_vma *vma, + void *argv, u32 argc) +{ + return nvkm_memory_map(gpuobj->memory, offset, vmm, vma, argv, argc); +} + static u32 nvkm_gpuobj_heap_rd32(struct nvkm_gpuobj *gpuobj, u32 offset) { @@ -67,6 +75,7 @@ nvkm_gpuobj_heap_fast = { .release = nvkm_gpuobj_heap_release, .rd32 = nvkm_gpuobj_rd32_fast, .wr32 = nvkm_gpuobj_wr32_fast, + .map = nvkm_gpuobj_heap_map, }; static const struct nvkm_gpuobj_func @@ -74,6 +83,7 @@ nvkm_gpuobj_heap_slow = { .release = nvkm_gpuobj_heap_release, .rd32 = nvkm_gpuobj_heap_rd32, .wr32 = nvkm_gpuobj_heap_wr32, + .map = nvkm_gpuobj_heap_map, }; static void * @@ -90,9 +100,19 @@ nvkm_gpuobj_heap_acquire(struct nvkm_gpuobj *gpuobj) static const struct nvkm_gpuobj_func nvkm_gpuobj_heap = { .acquire = nvkm_gpuobj_heap_acquire, + .map = nvkm_gpuobj_heap_map, }; /* accessor functions for gpuobjs sub-allocated from a parent gpuobj */ +static int +nvkm_gpuobj_map(struct nvkm_gpuobj *gpuobj, u64 offset, + struct nvkm_vmm *vmm, struct nvkm_vma *vma, + void *argv, u32 argc) +{ + return nvkm_memory_map(gpuobj->parent, gpuobj->node->offset + offset, + vmm, vma, argv, argc); +} + static u32 nvkm_gpuobj_rd32(struct nvkm_gpuobj *gpuobj, u32 offset) { @@ -118,6 +138,7 @@ nvkm_gpuobj_fast = { .release = nvkm_gpuobj_release, .rd32 = nvkm_gpuobj_rd32_fast, .wr32 = nvkm_gpuobj_wr32_fast, + .map = nvkm_gpuobj_map, }; static const struct nvkm_gpuobj_func @@ -125,6 +146,7 @@ nvkm_gpuobj_slow = { .release = nvkm_gpuobj_release, .rd32 = nvkm_gpuobj_rd32, .wr32 = nvkm_gpuobj_wr32, + .map = nvkm_gpuobj_map, }; static void * @@ -143,6 +165,7 @@ nvkm_gpuobj_acquire(struct nvkm_gpuobj *gpuobj) static const struct nvkm_gpuobj_func nvkm_gpuobj_func = { .acquire = nvkm_gpuobj_acquire, + .map = nvkm_gpuobj_map, }; static int @@ -185,7 +208,7 @@ nvkm_gpuobj_ctor(struct nvkm_device *device, u32 size, int align, bool zero, gpuobj->size = nvkm_memory_size(gpuobj->memory); } - return nvkm_mm_init(&gpuobj->heap, 0, gpuobj->size, 1); + return nvkm_mm_init(&gpuobj->heap, 0, 0, gpuobj->size, 1); } void @@ -196,7 +219,7 @@ nvkm_gpuobj_del(struct nvkm_gpuobj **pgpuobj) if (gpuobj->parent) nvkm_mm_free(&gpuobj->parent->heap, &gpuobj->node); nvkm_mm_fini(&gpuobj->heap); - nvkm_memory_del(&gpuobj->memory); + nvkm_memory_unref(&gpuobj->memory); kfree(*pgpuobj); *pgpuobj = NULL; } @@ -218,26 +241,6 @@ nvkm_gpuobj_new(struct nvkm_device *device, u32 size, int align, bool zero, return ret; } -int -nvkm_gpuobj_map(struct nvkm_gpuobj *gpuobj, struct nvkm_vm *vm, - u32 access, struct nvkm_vma *vma) -{ - struct nvkm_memory *memory = gpuobj->memory; - int ret = nvkm_vm_get(vm, gpuobj->size, 12, access, vma); - if (ret == 0) - nvkm_memory_map(memory, vma, 0); - return ret; -} - -void -nvkm_gpuobj_unmap(struct nvkm_vma *vma) -{ - if (vma->node) { - nvkm_vm_unmap(vma); - nvkm_vm_put(vma); - } -} - /* the below is basically only here to support sharing the paged dma object * for PCI(E)GART on <=nv4x chipsets, and should *not* be expected to work * anywhere else. diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c index be19bbe56bba..d777df5a64e6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c @@ -53,7 +53,7 @@ nvkm_ioctl_sclass(struct nvkm_client *client, union { struct nvif_ioctl_sclass_v0 v0; } *args = data; - struct nvkm_oclass oclass; + struct nvkm_oclass oclass = { .client = client }; int ret = -ENOSYS, i = 0; nvif_ioctl(object, "sclass size %d\n", size); @@ -257,13 +257,19 @@ nvkm_ioctl_map(struct nvkm_client *client, union { struct nvif_ioctl_map_v0 v0; } *args = data; + enum nvkm_object_map type; int ret = -ENOSYS; nvif_ioctl(object, "map size %d\n", size); - if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { + if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) { nvif_ioctl(object, "map vers %d\n", args->v0.version); - ret = nvkm_object_map(object, &args->v0.handle, - &args->v0.length); + ret = nvkm_object_map(object, data, size, &type, + &args->v0.handle, + &args->v0.length); + if (type == NVKM_OBJECT_MAP_IO) + args->v0.type = NVIF_IOCTL_MAP_V0_IO; + else + args->v0.type = NVIF_IOCTL_MAP_V0_VA; } return ret; @@ -281,6 +287,7 @@ nvkm_ioctl_unmap(struct nvkm_client *client, nvif_ioctl(object, "unmap size %d\n", size); if (!(ret = nvif_unvers(ret, &data, &size, args->none))) { nvif_ioctl(object, "unmap\n"); + ret = nvkm_object_unmap(object); } return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/core/memory.c b/drivers/gpu/drm/nouveau/nvkm/core/memory.c index 8903c04c977e..e85a08ecd9da 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/memory.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/memory.c @@ -22,27 +22,117 @@ * Authors: Ben Skeggs <bskeggs@redhat.com> */ #include <core/memory.h> +#include <core/mm.h> +#include <subdev/fb.h> #include <subdev/instmem.h> void +nvkm_memory_tags_put(struct nvkm_memory *memory, struct nvkm_device *device, + struct nvkm_tags **ptags) +{ + struct nvkm_fb *fb = device->fb; + struct nvkm_tags *tags = *ptags; + if (tags) { + mutex_lock(&fb->subdev.mutex); + if (refcount_dec_and_test(&tags->refcount)) { + nvkm_mm_free(&fb->tags, &tags->mn); + kfree(memory->tags); + memory->tags = NULL; + } + mutex_unlock(&fb->subdev.mutex); + *ptags = NULL; + } +} + +int +nvkm_memory_tags_get(struct nvkm_memory *memory, struct nvkm_device *device, + u32 nr, void (*clr)(struct nvkm_device *, u32, u32), + struct nvkm_tags **ptags) +{ + struct nvkm_fb *fb = device->fb; + struct nvkm_tags *tags; + + mutex_lock(&fb->subdev.mutex); + if ((tags = memory->tags)) { + /* If comptags exist for the memory, but a different amount + * than requested, the buffer is being mapped with settings + * that are incompatible with existing mappings. + */ + if (tags->mn && tags->mn->length != nr) { + mutex_unlock(&fb->subdev.mutex); + return -EINVAL; + } + + refcount_inc(&tags->refcount); + mutex_unlock(&fb->subdev.mutex); + *ptags = tags; + return 0; + } + + if (!(tags = kmalloc(sizeof(*tags), GFP_KERNEL))) { + mutex_unlock(&fb->subdev.mutex); + return -ENOMEM; + } + + if (!nvkm_mm_head(&fb->tags, 0, 1, nr, nr, 1, &tags->mn)) { + if (clr) + clr(device, tags->mn->offset, tags->mn->length); + } else { + /* Failure to allocate HW comptags is not an error, the + * caller should fall back to an uncompressed map. + * + * As memory can be mapped in multiple places, we still + * need to track the allocation failure and ensure that + * any additional mappings remain uncompressed. + * + * This is handled by returning an empty nvkm_tags. + */ + tags->mn = NULL; + } + + refcount_set(&tags->refcount, 1); + mutex_unlock(&fb->subdev.mutex); + *ptags = tags; + return 0; +} + +void nvkm_memory_ctor(const struct nvkm_memory_func *func, struct nvkm_memory *memory) { memory->func = func; + kref_init(&memory->kref); +} + +static void +nvkm_memory_del(struct kref *kref) +{ + struct nvkm_memory *memory = container_of(kref, typeof(*memory), kref); + if (!WARN_ON(!memory->func)) { + if (memory->func->dtor) + memory = memory->func->dtor(memory); + kfree(memory); + } } void -nvkm_memory_del(struct nvkm_memory **pmemory) +nvkm_memory_unref(struct nvkm_memory **pmemory) { struct nvkm_memory *memory = *pmemory; - if (memory && !WARN_ON(!memory->func)) { - if (memory->func->dtor) - *pmemory = memory->func->dtor(memory); - kfree(*pmemory); + if (memory) { + kref_put(&memory->kref, nvkm_memory_del); *pmemory = NULL; } } +struct nvkm_memory * +nvkm_memory_ref(struct nvkm_memory *memory) +{ + if (memory) + kref_get(&memory->kref); + return memory; +} + int nvkm_memory_new(struct nvkm_device *device, enum nvkm_memory_target target, u64 size, u32 align, bool zero, diff --git a/drivers/gpu/drm/nouveau/nvkm/core/mm.c b/drivers/gpu/drm/nouveau/nvkm/core/mm.c index 5c7891234eea..f78a06a6b2f1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/mm.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/mm.c @@ -237,7 +237,7 @@ nvkm_mm_tail(struct nvkm_mm *mm, u8 heap, u8 type, u32 size_max, u32 size_min, } int -nvkm_mm_init(struct nvkm_mm *mm, u32 offset, u32 length, u32 block) +nvkm_mm_init(struct nvkm_mm *mm, u8 heap, u32 offset, u32 length, u32 block) { struct nvkm_mm_node *node, *prev; u32 next; @@ -274,7 +274,8 @@ nvkm_mm_init(struct nvkm_mm *mm, u32 offset, u32 length, u32 block) list_add_tail(&node->nl_entry, &mm->nodes); list_add_tail(&node->fl_entry, &mm->free); - node->heap = ++mm->heap_nodes; + node->heap = heap; + mm->heap_nodes++; return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/core/object.c b/drivers/gpu/drm/nouveau/nvkm/core/object.c index acd76fd4f6d8..301a5e5b5f7f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/object.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/object.c @@ -102,10 +102,19 @@ nvkm_object_ntfy(struct nvkm_object *object, u32 mthd, } int -nvkm_object_map(struct nvkm_object *object, u64 *addr, u32 *size) +nvkm_object_map(struct nvkm_object *object, void *argv, u32 argc, + enum nvkm_object_map *type, u64 *addr, u64 *size) { if (likely(object->func->map)) - return object->func->map(object, addr, size); + return object->func->map(object, argv, argc, type, addr, size); + return -ENODEV; +} + +int +nvkm_object_unmap(struct nvkm_object *object) +{ + if (likely(object->func->unmap)) + return object->func->unmap(object); return -ENODEV; } @@ -259,6 +268,7 @@ nvkm_object_dtor(struct nvkm_object *object) } nvif_debug(object, "destroy running...\n"); + nvkm_object_unmap(object); if (object->func->dtor) data = object->func->dtor(object); nvkm_engine_unref(&object->engine); diff --git a/drivers/gpu/drm/nouveau/nvkm/core/oproxy.c b/drivers/gpu/drm/nouveau/nvkm/core/oproxy.c index e31a0479add0..16299837a296 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/oproxy.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/oproxy.c @@ -37,9 +37,17 @@ nvkm_oproxy_ntfy(struct nvkm_object *object, u32 mthd, } static int -nvkm_oproxy_map(struct nvkm_object *object, u64 *addr, u32 *size) +nvkm_oproxy_map(struct nvkm_object *object, void *argv, u32 argc, + enum nvkm_object_map *type, u64 *addr, u64 *size) { - return nvkm_object_map(nvkm_oproxy(object)->object, addr, size); + struct nvkm_oproxy *oproxy = nvkm_oproxy(object); + return nvkm_object_map(oproxy->object, argv, argc, type, addr, size); +} + +static int +nvkm_oproxy_unmap(struct nvkm_object *object) +{ + return nvkm_object_unmap(nvkm_oproxy(object)->object); } static int @@ -171,6 +179,7 @@ nvkm_oproxy_func = { .mthd = nvkm_oproxy_mthd, .ntfy = nvkm_oproxy_ntfy, .map = nvkm_oproxy_map, + .unmap = nvkm_oproxy_unmap, .rd08 = nvkm_oproxy_rd08, .rd16 = nvkm_oproxy_rd16, .rd32 = nvkm_oproxy_rd32, diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ramht.c b/drivers/gpu/drm/nouveau/nvkm/core/ramht.c index 89da47234016..ccba4ae73cc5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/ramht.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/ramht.c @@ -21,6 +21,7 @@ */ #include <core/ramht.h> #include <core/engine.h> +#include <core/object.h> static u32 nvkm_ramht_hash(struct nvkm_ramht *ramht, int chid, u32 handle) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c index 8e2e24a74774..44e116f7880d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c @@ -39,5 +39,5 @@ int g84_bsp_new(struct nvkm_device *device, int index, struct nvkm_engine **pengine) { return nvkm_xtensa_new_(&g84_bsp, device, index, - true, 0x103000, pengine); + device->chipset != 0x92, 0x103000, pengine); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index e096a5d9c292..e14643615698 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -927,7 +927,7 @@ nv84_chipset = { .i2c = nv50_i2c_new, .imem = nv50_instmem_new, .mc = g84_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g84_pci_new, .therm = g84_therm_new, @@ -959,7 +959,7 @@ nv86_chipset = { .i2c = nv50_i2c_new, .imem = nv50_instmem_new, .mc = g84_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g84_pci_new, .therm = g84_therm_new, @@ -991,7 +991,7 @@ nv92_chipset = { .i2c = nv50_i2c_new, .imem = nv50_instmem_new, .mc = g84_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g92_pci_new, .therm = g84_therm_new, @@ -1023,7 +1023,7 @@ nv94_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = g84_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .therm = g84_therm_new, @@ -1055,7 +1055,7 @@ nv96_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = g84_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .therm = g84_therm_new, @@ -1087,7 +1087,7 @@ nv98_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = g98_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .therm = g84_therm_new, @@ -1119,7 +1119,7 @@ nva0_chipset = { .i2c = nv50_i2c_new, .imem = nv50_instmem_new, .mc = g84_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .therm = g84_therm_new, @@ -1151,7 +1151,7 @@ nva3_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = gt215_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .pmu = gt215_pmu_new, @@ -1185,7 +1185,7 @@ nva5_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = gt215_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .pmu = gt215_pmu_new, @@ -1218,7 +1218,7 @@ nva8_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = gt215_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .pmu = gt215_pmu_new, @@ -1251,7 +1251,7 @@ nvaa_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = g98_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .therm = g84_therm_new, @@ -1283,7 +1283,7 @@ nvac_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = g98_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .therm = g84_therm_new, @@ -1315,7 +1315,7 @@ nvaf_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = gt215_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .pmu = gt215_pmu_new, @@ -1678,7 +1678,7 @@ nve4_chipset = { .imem = nv50_instmem_new, .ltc = gk104_ltc_new, .mc = gk104_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gk104_pmu_new, @@ -1717,7 +1717,7 @@ nve6_chipset = { .imem = nv50_instmem_new, .ltc = gk104_ltc_new, .mc = gk104_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gk104_pmu_new, @@ -1756,7 +1756,7 @@ nve7_chipset = { .imem = nv50_instmem_new, .ltc = gk104_ltc_new, .mc = gk104_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gk104_pmu_new, @@ -1790,7 +1790,7 @@ nvea_chipset = { .imem = gk20a_instmem_new, .ltc = gk104_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk20a_mmu_new, .pmu = gk20a_pmu_new, .timer = gk20a_timer_new, .top = gk104_top_new, @@ -1820,7 +1820,7 @@ nvf0_chipset = { .imem = nv50_instmem_new, .ltc = gk104_ltc_new, .mc = gk104_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gk110_pmu_new, @@ -1858,7 +1858,7 @@ nvf1_chipset = { .imem = nv50_instmem_new, .ltc = gk104_ltc_new, .mc = gk104_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gk110_pmu_new, @@ -1896,7 +1896,7 @@ nv106_chipset = { .imem = nv50_instmem_new, .ltc = gk104_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gk208_pmu_new, @@ -1934,7 +1934,7 @@ nv108_chipset = { .imem = nv50_instmem_new, .ltc = gk104_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gk208_pmu_new, @@ -1958,7 +1958,7 @@ nv108_chipset = { static const struct nvkm_device_chip nv117_chipset = { .name = "GM107", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .clk = gk104_clk_new, @@ -1972,7 +1972,7 @@ nv117_chipset = { .imem = nv50_instmem_new, .ltc = gm107_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gm107_pmu_new, @@ -1992,7 +1992,7 @@ nv117_chipset = { static const struct nvkm_device_chip nv118_chipset = { .name = "GM108", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .clk = gk104_clk_new, @@ -2006,7 +2006,7 @@ nv118_chipset = { .imem = nv50_instmem_new, .ltc = gm107_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gm107_pmu_new, @@ -2026,7 +2026,7 @@ nv118_chipset = { static const struct nvkm_device_chip nv120_chipset = { .name = "GM200", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2039,7 +2039,7 @@ nv120_chipset = { .imem = nv50_instmem_new, .ltc = gm200_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gm200_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gm107_pmu_new, @@ -2061,7 +2061,7 @@ nv120_chipset = { static const struct nvkm_device_chip nv124_chipset = { .name = "GM204", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2074,7 +2074,7 @@ nv124_chipset = { .imem = nv50_instmem_new, .ltc = gm200_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gm200_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gm107_pmu_new, @@ -2096,7 +2096,7 @@ nv124_chipset = { static const struct nvkm_device_chip nv126_chipset = { .name = "GM206", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2109,7 +2109,7 @@ nv126_chipset = { .imem = nv50_instmem_new, .ltc = gm200_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gm200_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gm107_pmu_new, @@ -2131,7 +2131,7 @@ nv126_chipset = { static const struct nvkm_device_chip nv12b_chipset = { .name = "GM20B", - .bar = gk20a_bar_new, + .bar = gm20b_bar_new, .bus = gf100_bus_new, .clk = gm20b_clk_new, .fb = gm20b_fb_new, @@ -2140,7 +2140,7 @@ nv12b_chipset = { .imem = gk20a_instmem_new, .ltc = gm200_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gm20b_mmu_new, .pmu = gm20b_pmu_new, .secboot = gm20b_secboot_new, .timer = gk20a_timer_new, @@ -2156,7 +2156,7 @@ nv12b_chipset = { static const struct nvkm_device_chip nv130_chipset = { .name = "GP100", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2168,7 +2168,8 @@ nv130_chipset = { .imem = nv50_instmem_new, .ltc = gp100_ltc_new, .mc = gp100_mc_new, - .mmu = gf100_mmu_new, + .mmu = gp100_mmu_new, + .therm = gp100_therm_new, .secboot = gm200_secboot_new, .pci = gp100_pci_new, .pmu = gp100_pmu_new, @@ -2190,7 +2191,7 @@ nv130_chipset = { static const struct nvkm_device_chip nv132_chipset = { .name = "GP102", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2202,7 +2203,8 @@ nv132_chipset = { .imem = nv50_instmem_new, .ltc = gp100_ltc_new, .mc = gp100_mc_new, - .mmu = gf100_mmu_new, + .mmu = gp100_mmu_new, + .therm = gp100_therm_new, .secboot = gp102_secboot_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, @@ -2224,7 +2226,7 @@ nv132_chipset = { static const struct nvkm_device_chip nv134_chipset = { .name = "GP104", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2236,7 +2238,8 @@ nv134_chipset = { .imem = nv50_instmem_new, .ltc = gp100_ltc_new, .mc = gp100_mc_new, - .mmu = gf100_mmu_new, + .mmu = gp100_mmu_new, + .therm = gp100_therm_new, .secboot = gp102_secboot_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, @@ -2258,7 +2261,7 @@ nv134_chipset = { static const struct nvkm_device_chip nv136_chipset = { .name = "GP106", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2270,7 +2273,8 @@ nv136_chipset = { .imem = nv50_instmem_new, .ltc = gp100_ltc_new, .mc = gp100_mc_new, - .mmu = gf100_mmu_new, + .mmu = gp100_mmu_new, + .therm = gp100_therm_new, .secboot = gp102_secboot_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, @@ -2292,7 +2296,7 @@ nv136_chipset = { static const struct nvkm_device_chip nv137_chipset = { .name = "GP107", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2304,7 +2308,8 @@ nv137_chipset = { .imem = nv50_instmem_new, .ltc = gp100_ltc_new, .mc = gp100_mc_new, - .mmu = gf100_mmu_new, + .mmu = gp100_mmu_new, + .therm = gp100_therm_new, .secboot = gp102_secboot_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, @@ -2326,7 +2331,7 @@ nv137_chipset = { static const struct nvkm_device_chip nv138_chipset = { .name = "GP108", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2338,7 +2343,8 @@ nv138_chipset = { .imem = nv50_instmem_new, .ltc = gp100_ltc_new, .mc = gp100_mc_new, - .mmu = gf100_mmu_new, + .mmu = gp100_mmu_new, + .therm = gp100_therm_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, .timer = gk20a_timer_new, @@ -2355,7 +2361,7 @@ nv138_chipset = { static const struct nvkm_device_chip nv13b_chipset = { .name = "GP10B", - .bar = gk20a_bar_new, + .bar = gm20b_bar_new, .bus = gf100_bus_new, .fb = gp10b_fb_new, .fuse = gm107_fuse_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.h b/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.h index 20249d8e444d..2c3c3ee3c494 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.h @@ -1,7 +1,7 @@ #ifndef __NVKM_DEVICE_CTRL_H__ #define __NVKM_DEVICE_CTRL_H__ #define nvkm_control(p) container_of((p), struct nvkm_control, object) -#include <core/device.h> +#include <core/object.h> struct nvkm_control { struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c index 74a1ffa425f7..f302d2b5782a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c @@ -1627,7 +1627,7 @@ nvkm_device_pci_new(struct pci_dev *pci_dev, const char *cfg, const char *dbg, const struct nvkm_device_pci_vendor *pciv; const char *name = NULL; struct nvkm_device_pci *pdev; - int ret; + int ret, bits; ret = pci_enable_device(pci_dev); if (ret) @@ -1679,17 +1679,17 @@ nvkm_device_pci_new(struct pci_dev *pci_dev, const char *cfg, const char *dbg, if (ret) return ret; - /* - * Set a preliminary DMA mask based on the .dma_bits member of the - * MMU subdevice. This allows other subdevices to create DMA mappings - * in their init() or oneinit() methods, which may be called before the - * TTM layer sets the DMA mask definitively. - * This is necessary for platforms where the default DMA mask of 32 - * does not cover any system memory, i.e., when all RAM is > 4 GB. - */ - if (pdev->device.mmu) - dma_set_mask_and_coherent(&pci_dev->dev, - DMA_BIT_MASK(pdev->device.mmu->dma_bits)); + /* Set DMA mask based on capabilities reported by the MMU subdev. */ + if (pdev->device.mmu && !pdev->device.pci->agp.bridge) + bits = pdev->device.mmu->dma_bits; + else + bits = 32; + + ret = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(bits)); + if (ret && bits != 32) { + dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); + pdev->device.mmu->dma_bits = 32; + } return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c index 189ed80e21ff..78597da6313a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c @@ -136,7 +136,7 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) if (ret) goto free_domain; - ret = nvkm_mm_init(&tdev->iommu.mm, 0, + ret = nvkm_mm_init(&tdev->iommu.mm, 0, 0, (1ULL << tdev->func->iommu_bit) >> tdev->iommu.pgshift, 1); if (ret) @@ -216,7 +216,7 @@ nvkm_device_tegra_fini(struct nvkm_device *device, bool suspend) if (tdev->irq) { free_irq(tdev->irq, tdev); tdev->irq = 0; - }; + } } static int @@ -309,8 +309,6 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func, /** * The IOMMU bit defines the upper limit of the GPU-addressable space. - * This will be refined in nouveau_ttm_init but we need to do it early - * for instmem to behave properly */ ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(tdev->func->iommu_bit)); if (ret) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c index 513ee6b79553..17adcb4e8854 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c @@ -206,10 +206,12 @@ nvkm_udevice_wr32(struct nvkm_object *object, u64 addr, u32 data) } static int -nvkm_udevice_map(struct nvkm_object *object, u64 *addr, u32 *size) +nvkm_udevice_map(struct nvkm_object *object, void *argv, u32 argc, + enum nvkm_object_map *type, u64 *addr, u64 *size) { struct nvkm_udevice *udev = nvkm_udevice(object); struct nvkm_device *device = udev->device; + *type = NVKM_OBJECT_MAP_IO; *addr = device->func->resource_addr(device, 0); *size = device->func->resource_size(device, 0); return 0; @@ -292,6 +294,11 @@ nvkm_udevice_child_get(struct nvkm_object *object, int index, if (!sclass) { switch (index) { case 0: sclass = &nvkm_control_oclass; break; + case 1: + if (!device->mmu) + return -EINVAL; + sclass = &device->mmu->user; + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c index 0c0310498afd..723dcbde2ac2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c @@ -191,11 +191,13 @@ nv50_disp_chan_ntfy(struct nvkm_object *object, u32 type, } static int -nv50_disp_chan_map(struct nvkm_object *object, u64 *addr, u32 *size) +nv50_disp_chan_map(struct nvkm_object *object, void *argv, u32 argc, + enum nvkm_object_map *type, u64 *addr, u64 *size) { struct nv50_disp_chan *chan = nv50_disp_chan(object); struct nv50_disp *disp = chan->root->disp; struct nvkm_device *device = disp->base.engine.subdev.device; + *type = NVKM_OBJECT_MAP_IO; *addr = device->func->resource_addr(device, 0) + 0x640000 + (chan->chid.user * 0x1000); *size = 0x001000; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h index 737b38f6fbd2..9bb4ad5b0e57 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h @@ -1,6 +1,7 @@ #ifndef __NV50_DISP_CHAN_H__ #define __NV50_DISP_CHAN_H__ #define nv50_disp_chan(p) container_of((p), struct nv50_disp_chan, object) +#include <core/object.h> #include "nv50.h" struct nv50_disp_chan { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h index a1e8bf48b778..c9e0a8f7b5d5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h @@ -147,7 +147,7 @@ void gf119_hda_eld(struct nvkm_ior *, u8 *, u8); #define IOR_MSG(i,l,f,a...) do { \ struct nvkm_ior *_ior = (i); \ - nvkm_##l(&_ior->disp->engine.subdev, "%s: "f, _ior->name, ##a); \ + nvkm_##l(&_ior->disp->engine.subdev, "%s: "f"\n", _ior->name, ##a); \ } while(0) #define IOR_WARN(i,f,a...) IOR_MSG((i), warn, f, ##a) #define IOR_DBG(i,f,a...) IOR_MSG((i), debug, f, ##a) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/usernv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/dma/usernv04.c index c95942ef8216..49ef7e57aad4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/usernv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/usernv04.c @@ -26,7 +26,7 @@ #include <core/gpuobj.h> #include <subdev/fb.h> -#include <subdev/mmu/nv04.h> +#include <subdev/mmu/vmm.h> #include <nvif/class.h> @@ -49,8 +49,8 @@ nv04_dmaobj_bind(struct nvkm_dmaobj *base, struct nvkm_gpuobj *parent, int ret; if (dmaobj->clone) { - struct nv04_mmu *mmu = nv04_mmu(device->mmu); - struct nvkm_memory *pgt = mmu->vm->pgt[0].mem[0]; + struct nvkm_memory *pgt = + device->mmu->vmm->pd->pt[0]->memory; if (!dmaobj->base.start) return nvkm_gpuobj_wrap(pgt, pgpuobj); nvkm_kmap(pgt); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c b/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c index 2e7b4e2105ef..816ccaedfc73 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c @@ -99,7 +99,7 @@ nvkm_falcon_fini(struct nvkm_engine *engine, bool suspend) const u32 base = falcon->addr; if (!suspend) { - nvkm_memory_del(&falcon->core); + nvkm_memory_unref(&falcon->core); if (falcon->external) { vfree(falcon->data.data); vfree(falcon->code.data); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c index 660ca7aa95ea..64f6b7654a08 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c @@ -27,6 +27,7 @@ #include <core/client.h> #include <core/gpuobj.h> #include <core/notify.h> +#include <subdev/mc.h> #include <nvif/event.h> #include <nvif/unpack.h> @@ -278,6 +279,12 @@ nvkm_fifo_oneinit(struct nvkm_engine *engine) return 0; } +static void +nvkm_fifo_preinit(struct nvkm_engine *engine) +{ + nvkm_mc_reset(engine->subdev.device, NVKM_ENGINE_FIFO); +} + static int nvkm_fifo_init(struct nvkm_engine *engine) { @@ -302,6 +309,7 @@ nvkm_fifo_dtor(struct nvkm_engine *engine) static const struct nvkm_engine_func nvkm_fifo = { .dtor = nvkm_fifo_dtor, + .preinit = nvkm_fifo_preinit, .oneinit = nvkm_fifo_oneinit, .init = nvkm_fifo_init, .fini = nvkm_fifo_fini, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c index fab760ae922f..d83485385934 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c @@ -117,8 +117,8 @@ nvkm_fifo_chan_child_del(struct nvkm_oproxy *base) if (chan->func->engine_dtor) chan->func->engine_dtor(chan, engine); nvkm_object_del(&engn->object); - if (chan->vm) - atomic_dec(&chan->vm->engref[engine->subdev.index]); + if (chan->vmm) + atomic_dec(&chan->vmm->engref[engine->subdev.index]); } } @@ -151,8 +151,8 @@ nvkm_fifo_chan_child_new(const struct nvkm_oclass *oclass, void *data, u32 size, .engine = oclass->engine, }; - if (chan->vm) - atomic_inc(&chan->vm->engref[engine->subdev.index]); + if (chan->vmm) + atomic_inc(&chan->vmm->engref[engine->subdev.index]); if (engine->func->fifo.cclass) { ret = engine->func->fifo.cclass(chan, &cclass, @@ -253,9 +253,11 @@ nvkm_fifo_chan_ntfy(struct nvkm_object *object, u32 type, } static int -nvkm_fifo_chan_map(struct nvkm_object *object, u64 *addr, u32 *size) +nvkm_fifo_chan_map(struct nvkm_object *object, void *argv, u32 argc, + enum nvkm_object_map *type, u64 *addr, u64 *size) { struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object); + *type = NVKM_OBJECT_MAP_IO; *addr = chan->addr; *size = chan->size; return 0; @@ -325,7 +327,10 @@ nvkm_fifo_chan_dtor(struct nvkm_object *object) if (chan->user) iounmap(chan->user); - nvkm_vm_ref(NULL, &chan->vm, NULL); + if (chan->vmm) { + nvkm_vmm_part(chan->vmm, chan->inst->memory); + nvkm_vmm_unref(&chan->vmm); + } nvkm_gpuobj_del(&chan->push); nvkm_gpuobj_del(&chan->inst); @@ -347,13 +352,12 @@ nvkm_fifo_chan_func = { int nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func, struct nvkm_fifo *fifo, u32 size, u32 align, bool zero, - u64 vm, u64 push, u64 engines, int bar, u32 base, u32 user, - const struct nvkm_oclass *oclass, + u64 hvmm, u64 push, u64 engines, int bar, u32 base, + u32 user, const struct nvkm_oclass *oclass, struct nvkm_fifo_chan *chan) { struct nvkm_client *client = oclass->client; struct nvkm_device *device = fifo->engine.subdev.device; - struct nvkm_mmu *mmu = device->mmu; struct nvkm_dmaobj *dmaobj; unsigned long flags; int ret; @@ -382,16 +386,19 @@ nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func, } /* channel address space */ - if (!vm && mmu) { - if (!client->vm || client->vm->mmu == mmu) { - ret = nvkm_vm_ref(client->vm, &chan->vm, NULL); - if (ret) - return ret; - } else { + if (hvmm) { + struct nvkm_vmm *vmm = nvkm_uvmm_search(client, hvmm); + if (IS_ERR(vmm)) + return PTR_ERR(vmm); + + if (vmm->mmu != device->mmu) return -EINVAL; - } - } else { - return -ENOENT; + + ret = nvkm_vmm_join(vmm, chan->inst->memory); + if (ret) + return ret; + + chan->vmm = nvkm_vmm_ref(vmm); } /* allocate channel id */ diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c index 61797c4dd07a..a5c998fe4485 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c @@ -229,15 +229,18 @@ g84_fifo_chan_func = { }; int -g84_fifo_chan_ctor(struct nv50_fifo *fifo, u64 vm, u64 push, +g84_fifo_chan_ctor(struct nv50_fifo *fifo, u64 vmm, u64 push, const struct nvkm_oclass *oclass, struct nv50_fifo_chan *chan) { struct nvkm_device *device = fifo->base.engine.subdev.device; int ret; + if (!vmm) + return -EINVAL; + ret = nvkm_fifo_chan_ctor(&g84_fifo_chan_func, &fifo->base, - 0x10000, 0x1000, false, vm, push, + 0x10000, 0x1000, false, vmm, push, (1ULL << NVKM_ENGINE_BSP) | (1ULL << NVKM_ENGINE_CE0) | (1ULL << NVKM_ENGINE_CIPHER) | @@ -277,9 +280,5 @@ g84_fifo_chan_ctor(struct nv50_fifo *fifo, u64 vm, u64 push, if (ret) return ret; - ret = nvkm_ramht_new(device, 0x8000, 16, chan->base.inst, &chan->ramht); - if (ret) - return ret; - - return nvkm_vm_ref(chan->base.vm, &chan->vm, chan->pgd); + return nvkm_ramht_new(device, 0x8000, 16, chan->base.inst, &chan->ramht); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changf100.h index 7d697e2dce1a..fc1142af02cf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changf100.h @@ -11,12 +11,9 @@ struct gf100_fifo_chan { struct list_head head; bool killed; - struct nvkm_gpuobj *pgd; - struct nvkm_vm *vm; - struct { struct nvkm_gpuobj *inst; - struct nvkm_vma vma; + struct nvkm_vma *vma; } engn[NVKM_SUBDEV_NR]; }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h index 230f64e5f731..5beb5c628473 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h @@ -12,12 +12,9 @@ struct gk104_fifo_chan { struct list_head head; bool killed; - struct nvkm_gpuobj *pgd; - struct nvkm_vm *vm; - struct { struct nvkm_gpuobj *inst; - struct nvkm_vma vma; + struct nvkm_vma *vma; } engn[NVKM_SUBDEV_NR]; }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.c index 25b60aff40e4..85f7dbf53c99 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.c @@ -206,7 +206,6 @@ void * nv50_fifo_chan_dtor(struct nvkm_fifo_chan *base) { struct nv50_fifo_chan *chan = nv50_fifo_chan(base); - nvkm_vm_ref(NULL, &chan->vm, chan->pgd); nvkm_ramht_del(&chan->ramht); nvkm_gpuobj_del(&chan->pgd); nvkm_gpuobj_del(&chan->eng); @@ -229,15 +228,18 @@ nv50_fifo_chan_func = { }; int -nv50_fifo_chan_ctor(struct nv50_fifo *fifo, u64 vm, u64 push, +nv50_fifo_chan_ctor(struct nv50_fifo *fifo, u64 vmm, u64 push, const struct nvkm_oclass *oclass, struct nv50_fifo_chan *chan) { struct nvkm_device *device = fifo->base.engine.subdev.device; int ret; + if (!vmm) + return -EINVAL; + ret = nvkm_fifo_chan_ctor(&nv50_fifo_chan_func, &fifo->base, - 0x10000, 0x1000, false, vm, push, + 0x10000, 0x1000, false, vmm, push, (1ULL << NVKM_ENGINE_DMAOBJ) | (1ULL << NVKM_ENGINE_SW) | (1ULL << NVKM_ENGINE_GR) | @@ -262,9 +264,5 @@ nv50_fifo_chan_ctor(struct nv50_fifo *fifo, u64 vm, u64 push, if (ret) return ret; - ret = nvkm_ramht_new(device, 0x8000, 16, chan->base.inst, &chan->ramht); - if (ret) - return ret; - - return nvkm_vm_ref(chan->base.vm, &chan->vm, chan->pgd); + return nvkm_ramht_new(device, 0x8000, 16, chan->base.inst, &chan->ramht); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h index 4b9da469b704..d853056e040b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h @@ -13,19 +13,18 @@ struct nv50_fifo_chan { struct nvkm_gpuobj *eng; struct nvkm_gpuobj *pgd; struct nvkm_ramht *ramht; - struct nvkm_vm *vm; struct nvkm_gpuobj *engn[NVKM_SUBDEV_NR]; }; -int nv50_fifo_chan_ctor(struct nv50_fifo *, u64 vm, u64 push, +int nv50_fifo_chan_ctor(struct nv50_fifo *, u64 vmm, u64 push, const struct nvkm_oclass *, struct nv50_fifo_chan *); void *nv50_fifo_chan_dtor(struct nvkm_fifo_chan *); void nv50_fifo_chan_fini(struct nvkm_fifo_chan *); void nv50_fifo_chan_engine_dtor(struct nvkm_fifo_chan *, struct nvkm_engine *); void nv50_fifo_chan_object_dtor(struct nvkm_fifo_chan *, int); -int g84_fifo_chan_ctor(struct nv50_fifo *, u64 vm, u64 push, +int g84_fifo_chan_ctor(struct nv50_fifo *, u64 vmm, u64 push, const struct nvkm_oclass *, struct nv50_fifo_chan *); extern const struct nvkm_fifo_chan_oclass nv50_fifo_dma_oclass; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c index caa914074752..fc34cddcd2f5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c @@ -44,9 +44,9 @@ g84_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, nvif_ioctl(parent, "create channel dma size %d\n", size); if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { - nvif_ioctl(parent, "create channel dma vers %d vm %llx " + nvif_ioctl(parent, "create channel dma vers %d vmm %llx " "pushbuf %llx offset %016llx\n", - args->v0.version, args->v0.vm, args->v0.pushbuf, + args->v0.version, args->v0.vmm, args->v0.pushbuf, args->v0.offset); if (!args->v0.pushbuf) return -EINVAL; @@ -57,7 +57,7 @@ g84_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, return -ENOMEM; *pobject = &chan->base.object; - ret = g84_fifo_chan_ctor(fifo, args->v0.vm, args->v0.pushbuf, + ret = g84_fifo_chan_ctor(fifo, args->v0.vmm, args->v0.pushbuf, oclass, chan); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c index 0a7b6ed5ed28..c213122cf088 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c @@ -95,6 +95,7 @@ nv04_fifo_dma_fini(struct nvkm_fifo_chan *base) nvkm_mask(device, NV04_PFIFO_CACHE1_PULL0, 0x00000001, 0); c = fifo->ramfc; + nvkm_kmap(fctx); do { u32 rm = ((1ULL << c->bits) - 1) << c->regs; u32 cm = ((1ULL << c->bits) - 1) << c->ctxs; @@ -102,6 +103,7 @@ nv04_fifo_dma_fini(struct nvkm_fifo_chan *base) u32 cv = (nvkm_ro32(fctx, c->ctxp + data) & ~cm); nvkm_wo32(fctx, c->ctxp + data, cv | (rv << c->ctxs)); } while ((++c)->bits); + nvkm_done(fctx); c = fifo->ramfc; do { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c index 480bc3777be5..8043718ad150 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c @@ -44,9 +44,9 @@ nv50_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, nvif_ioctl(parent, "create channel dma size %d\n", size); if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { - nvif_ioctl(parent, "create channel dma vers %d vm %llx " + nvif_ioctl(parent, "create channel dma vers %d vmm %llx " "pushbuf %llx offset %016llx\n", - args->v0.version, args->v0.vm, args->v0.pushbuf, + args->v0.version, args->v0.vmm, args->v0.pushbuf, args->v0.offset); if (!args->v0.pushbuf) return -EINVAL; @@ -57,7 +57,7 @@ nv50_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, return -ENOMEM; *pobject = &chan->base.object; - ret = nv50_fifo_chan_ctor(fifo, args->v0.vm, args->v0.pushbuf, + ret = nv50_fifo_chan_ctor(fifo, args->v0.vmm, args->v0.pushbuf, oclass, chan); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c index cd468ab1db12..f69576868164 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c @@ -559,6 +559,7 @@ gf100_fifo_oneinit(struct nvkm_fifo *base) struct gf100_fifo *fifo = gf100_fifo(base); struct nvkm_subdev *subdev = &fifo->base.engine.subdev; struct nvkm_device *device = subdev->device; + struct nvkm_vmm *bar = nvkm_bar_bar1_vmm(device); int ret; /* Determine number of PBDMAs by checking valid enable bits. */ @@ -584,12 +585,12 @@ gf100_fifo_oneinit(struct nvkm_fifo *base) if (ret) return ret; - ret = nvkm_bar_umap(device->bar, 128 * 0x1000, 12, &fifo->user.bar); + ret = nvkm_vmm_get(bar, 12, nvkm_memory_size(fifo->user.mem), + &fifo->user.bar); if (ret) return ret; - nvkm_memory_map(fifo->user.mem, &fifo->user.bar, 0); - return 0; + return nvkm_memory_map(fifo->user.mem, 0, bar, fifo->user.bar, NULL, 0); } static void @@ -628,7 +629,7 @@ gf100_fifo_init(struct nvkm_fifo *base) } nvkm_mask(device, 0x002200, 0x00000001, 0x00000001); - nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar.offset >> 12); + nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar->addr >> 12); nvkm_wr32(device, 0x002100, 0xffffffff); nvkm_wr32(device, 0x002140, 0x7fffffff); @@ -639,10 +640,11 @@ static void * gf100_fifo_dtor(struct nvkm_fifo *base) { struct gf100_fifo *fifo = gf100_fifo(base); - nvkm_vm_put(&fifo->user.bar); - nvkm_memory_del(&fifo->user.mem); - nvkm_memory_del(&fifo->runlist.mem[0]); - nvkm_memory_del(&fifo->runlist.mem[1]); + struct nvkm_device *device = fifo->base.engine.subdev.device; + nvkm_vmm_put(nvkm_bar_bar1_vmm(device), &fifo->user.bar); + nvkm_memory_unref(&fifo->user.mem); + nvkm_memory_unref(&fifo->runlist.mem[0]); + nvkm_memory_unref(&fifo->runlist.mem[1]); return fifo; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h index 70db58eab9c3..b81a2ad48aa4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h @@ -26,7 +26,7 @@ struct gf100_fifo { struct { struct nvkm_memory *mem; - struct nvkm_vma bar; + struct nvkm_vma *bar; } user; }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c index a7e55c422501..84bd703dd897 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c @@ -771,6 +771,7 @@ gk104_fifo_oneinit(struct nvkm_fifo *base) struct gk104_fifo *fifo = gk104_fifo(base); struct nvkm_subdev *subdev = &fifo->base.engine.subdev; struct nvkm_device *device = subdev->device; + struct nvkm_vmm *bar = nvkm_bar_bar1_vmm(device); int engn, runl, pbid, ret, i, j; enum nvkm_devidx engidx; u32 *map; @@ -834,13 +835,12 @@ gk104_fifo_oneinit(struct nvkm_fifo *base) if (ret) return ret; - ret = nvkm_bar_umap(device->bar, fifo->base.nr * 0x200, 12, - &fifo->user.bar); + ret = nvkm_vmm_get(bar, 12, nvkm_memory_size(fifo->user.mem), + &fifo->user.bar); if (ret) return ret; - nvkm_memory_map(fifo->user.mem, &fifo->user.bar, 0); - return 0; + return nvkm_memory_map(fifo->user.mem, 0, bar, fifo->user.bar, NULL, 0); } static void @@ -866,7 +866,7 @@ gk104_fifo_init(struct nvkm_fifo *base) nvkm_wr32(device, 0x04014c + (i * 0x2000), 0xffffffff); /* INTREN */ } - nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar.offset >> 12); + nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar->addr >> 12); nvkm_wr32(device, 0x002100, 0xffffffff); nvkm_wr32(device, 0x002140, 0x7fffffff); @@ -876,14 +876,15 @@ static void * gk104_fifo_dtor(struct nvkm_fifo *base) { struct gk104_fifo *fifo = gk104_fifo(base); + struct nvkm_device *device = fifo->base.engine.subdev.device; int i; - nvkm_vm_put(&fifo->user.bar); - nvkm_memory_del(&fifo->user.mem); + nvkm_vmm_put(nvkm_bar_bar1_vmm(device), &fifo->user.bar); + nvkm_memory_unref(&fifo->user.mem); for (i = 0; i < fifo->runlist_nr; i++) { - nvkm_memory_del(&fifo->runlist[i].mem[1]); - nvkm_memory_del(&fifo->runlist[i].mem[0]); + nvkm_memory_unref(&fifo->runlist[i].mem[1]); + nvkm_memory_unref(&fifo->runlist[i].mem[0]); } return fifo; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h index 44bff98d6725..466f1051f91a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h @@ -37,7 +37,7 @@ struct gk104_fifo { struct { struct nvkm_memory *mem; - struct nvkm_vma bar; + struct nvkm_vma *bar; } user; }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifog84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifog84.c index 77c2f2a28bf3..2121f517b1dd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifog84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifog84.c @@ -45,10 +45,10 @@ g84_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, nvif_ioctl(parent, "create channel gpfifo size %d\n", size); if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { - nvif_ioctl(parent, "create channel gpfifo vers %d vm %llx " + nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx " "pushbuf %llx ioffset %016llx " "ilength %08x\n", - args->v0.version, args->v0.vm, args->v0.pushbuf, + args->v0.version, args->v0.vmm, args->v0.pushbuf, args->v0.ioffset, args->v0.ilength); if (!args->v0.pushbuf) return -EINVAL; @@ -59,7 +59,7 @@ g84_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, return -ENOMEM; *pobject = &chan->base.object; - ret = g84_fifo_chan_ctor(fifo, args->v0.vm, args->v0.pushbuf, + ret = g84_fifo_chan_ctor(fifo, args->v0.vmm, args->v0.pushbuf, oclass, chan); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c index f9e0377d3d24..75f9632789b3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c @@ -111,7 +111,7 @@ gf100_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base, struct nvkm_gpuobj *inst = chan->base.inst; if (offset) { - u64 addr = chan->engn[engine->subdev.index].vma.offset; + u64 addr = chan->engn[engine->subdev.index].vma->addr; nvkm_kmap(inst); nvkm_wo32(inst, offset + 0x00, lower_32_bits(addr) | 4); nvkm_wo32(inst, offset + 0x04, upper_32_bits(addr)); @@ -126,7 +126,7 @@ gf100_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *base, struct nvkm_engine *engine) { struct gf100_fifo_chan *chan = gf100_fifo_chan(base); - nvkm_gpuobj_unmap(&chan->engn[engine->subdev.index].vma); + nvkm_vmm_put(chan->base.vmm, &chan->engn[engine->subdev.index].vma); nvkm_gpuobj_del(&chan->engn[engine->subdev.index].inst); } @@ -146,8 +146,13 @@ gf100_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *base, if (ret) return ret; - return nvkm_gpuobj_map(chan->engn[engn].inst, chan->vm, - NV_MEM_ACCESS_RW, &chan->engn[engn].vma); + ret = nvkm_vmm_get(chan->base.vmm, 12, chan->engn[engn].inst->size, + &chan->engn[engn].vma); + if (ret) + return ret; + + return nvkm_memory_map(chan->engn[engn].inst, 0, chan->base.vmm, + chan->engn[engn].vma, NULL, 0); } static void @@ -190,10 +195,7 @@ gf100_fifo_gpfifo_init(struct nvkm_fifo_chan *base) static void * gf100_fifo_gpfifo_dtor(struct nvkm_fifo_chan *base) { - struct gf100_fifo_chan *chan = gf100_fifo_chan(base); - nvkm_vm_ref(NULL, &chan->vm, chan->pgd); - nvkm_gpuobj_del(&chan->pgd); - return chan; + return gf100_fifo_chan(base); } static const struct nvkm_fifo_chan_func @@ -216,7 +218,6 @@ gf100_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, struct fermi_channel_gpfifo_v0 v0; } *args = data; struct gf100_fifo *fifo = gf100_fifo(base); - struct nvkm_device *device = fifo->base.engine.subdev.device; struct nvkm_object *parent = oclass->parent; struct gf100_fifo_chan *chan; u64 usermem, ioffset, ilength; @@ -224,10 +225,12 @@ gf100_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, nvif_ioctl(parent, "create channel gpfifo size %d\n", size); if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { - nvif_ioctl(parent, "create channel gpfifo vers %d vm %llx " + nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx " "ioffset %016llx ilength %08x\n", - args->v0.version, args->v0.vm, args->v0.ioffset, + args->v0.version, args->v0.vmm, args->v0.ioffset, args->v0.ilength); + if (!args->v0.vmm) + return -EINVAL; } else return ret; @@ -239,7 +242,7 @@ gf100_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, INIT_LIST_HEAD(&chan->head); ret = nvkm_fifo_chan_ctor(&gf100_fifo_gpfifo_func, &fifo->base, - 0x1000, 0x1000, true, args->v0.vm, 0, + 0x1000, 0x1000, true, args->v0.vmm, 0, (1ULL << NVKM_ENGINE_CE0) | (1ULL << NVKM_ENGINE_CE1) | (1ULL << NVKM_ENGINE_GR) | @@ -247,29 +250,13 @@ gf100_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, (1ULL << NVKM_ENGINE_MSPPP) | (1ULL << NVKM_ENGINE_MSVLD) | (1ULL << NVKM_ENGINE_SW), - 1, fifo->user.bar.offset, 0x1000, + 1, fifo->user.bar->addr, 0x1000, oclass, &chan->base); if (ret) return ret; args->v0.chid = chan->base.chid; - /* page directory */ - ret = nvkm_gpuobj_new(device, 0x10000, 0x1000, false, NULL, &chan->pgd); - if (ret) - return ret; - - nvkm_kmap(chan->base.inst); - nvkm_wo32(chan->base.inst, 0x0200, lower_32_bits(chan->pgd->addr)); - nvkm_wo32(chan->base.inst, 0x0204, upper_32_bits(chan->pgd->addr)); - nvkm_wo32(chan->base.inst, 0x0208, 0xffffffff); - nvkm_wo32(chan->base.inst, 0x020c, 0x000000ff); - nvkm_done(chan->base.inst); - - ret = nvkm_vm_ref(chan->base.vm, &chan->vm, chan->pgd); - if (ret) - return ret; - /* clear channel control registers */ usermem = chan->base.chid * 0x1000; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c index 8abf6f8ef445..80c87521bebe 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c @@ -117,7 +117,7 @@ gk104_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base, u32 offset = gk104_fifo_gpfifo_engine_addr(engine); if (offset) { - u64 addr = chan->engn[engine->subdev.index].vma.offset; + u64 addr = chan->engn[engine->subdev.index].vma->addr; u32 datalo = lower_32_bits(addr) | 0x00000004; u32 datahi = upper_32_bits(addr); nvkm_kmap(inst); @@ -138,7 +138,7 @@ gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *base, struct nvkm_engine *engine) { struct gk104_fifo_chan *chan = gk104_fifo_chan(base); - nvkm_gpuobj_unmap(&chan->engn[engine->subdev.index].vma); + nvkm_vmm_put(chan->base.vmm, &chan->engn[engine->subdev.index].vma); nvkm_gpuobj_del(&chan->engn[engine->subdev.index].inst); } @@ -158,8 +158,13 @@ gk104_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *base, if (ret) return ret; - return nvkm_gpuobj_map(chan->engn[engn].inst, chan->vm, - NV_MEM_ACCESS_RW, &chan->engn[engn].vma); + ret = nvkm_vmm_get(chan->base.vmm, 12, chan->engn[engn].inst->size, + &chan->engn[engn].vma); + if (ret) + return ret; + + return nvkm_memory_map(chan->engn[engn].inst, 0, chan->base.vmm, + chan->engn[engn].vma, NULL, 0); } static void @@ -203,10 +208,7 @@ gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *base) static void * gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *base) { - struct gk104_fifo_chan *chan = gk104_fifo_chan(base); - nvkm_vm_ref(NULL, &chan->vm, chan->pgd); - nvkm_gpuobj_del(&chan->pgd); - return chan; + return gk104_fifo_chan(base); } static const struct nvkm_fifo_chan_func @@ -229,17 +231,19 @@ struct gk104_fifo_chan_func { static int gk104_fifo_gpfifo_new_(const struct gk104_fifo_chan_func *func, struct gk104_fifo *fifo, u32 *engmask, u16 *chid, - u64 vm, u64 ioffset, u64 ilength, + u64 vmm, u64 ioffset, u64 ilength, const struct nvkm_oclass *oclass, struct nvkm_object **pobject) { - struct nvkm_device *device = fifo->base.engine.subdev.device; struct gk104_fifo_chan *chan; int runlist = -1, ret = -ENOSYS, i, j; u32 engines = 0, present = 0; u64 subdevs = 0; u64 usermem; + if (!vmm) + return -EINVAL; + /* Determine which downstream engines are present */ for (i = 0; i < fifo->engine_nr; i++) { struct nvkm_engine *engine = fifo->engine[i].engine; @@ -285,30 +289,14 @@ gk104_fifo_gpfifo_new_(const struct gk104_fifo_chan_func *func, INIT_LIST_HEAD(&chan->head); ret = nvkm_fifo_chan_ctor(&gk104_fifo_gpfifo_func, &fifo->base, - 0x1000, 0x1000, true, vm, 0, subdevs, - 1, fifo->user.bar.offset, 0x200, + 0x1000, 0x1000, true, vmm, 0, subdevs, + 1, fifo->user.bar->addr, 0x200, oclass, &chan->base); if (ret) return ret; *chid = chan->base.chid; - /* Page directory. */ - ret = nvkm_gpuobj_new(device, 0x10000, 0x1000, false, NULL, &chan->pgd); - if (ret) - return ret; - - nvkm_kmap(chan->base.inst); - nvkm_wo32(chan->base.inst, 0x0200, lower_32_bits(chan->pgd->addr)); - nvkm_wo32(chan->base.inst, 0x0204, upper_32_bits(chan->pgd->addr)); - nvkm_wo32(chan->base.inst, 0x0208, 0xffffffff); - nvkm_wo32(chan->base.inst, 0x020c, 0x000000ff); - nvkm_done(chan->base.inst); - - ret = nvkm_vm_ref(chan->base.vm, &chan->vm, chan->pgd); - if (ret) - return ret; - /* Clear channel control registers. */ usermem = chan->base.chid * 0x200; ilength = order_base_2(ilength / 8); @@ -373,18 +361,17 @@ gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, nvif_ioctl(parent, "create channel gpfifo size %d\n", size); if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { - nvif_ioctl(parent, "create channel gpfifo vers %d vm %llx " + nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx " "ioffset %016llx ilength %08x engine %08x\n", - args->v0.version, args->v0.vm, args->v0.ioffset, + args->v0.version, args->v0.vmm, args->v0.ioffset, args->v0.ilength, args->v0.engines); return gk104_fifo_gpfifo_new_(gk104_fifo_gpfifo, fifo, &args->v0.engines, &args->v0.chid, - args->v0.vm, + args->v0.vmm, args->v0.ioffset, args->v0.ilength, oclass, pobject); - } return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifonv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifonv50.c index c5a7de9db259..d8f28ec1e4a8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifonv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifonv50.c @@ -45,10 +45,10 @@ nv50_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, nvif_ioctl(parent, "create channel gpfifo size %d\n", size); if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { - nvif_ioctl(parent, "create channel gpfifo vers %d vm %llx " + nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx " "pushbuf %llx ioffset %016llx " "ilength %08x\n", - args->v0.version, args->v0.vm, args->v0.pushbuf, + args->v0.version, args->v0.vmm, args->v0.pushbuf, args->v0.ioffset, args->v0.ilength); if (!args->v0.pushbuf) return -EINVAL; @@ -59,7 +59,7 @@ nv50_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, return -ENOMEM; *pobject = &chan->base.object; - ret = nv50_fifo_chan_ctor(fifo, args->v0.vm, args->v0.pushbuf, + ret = nv50_fifo_chan_ctor(fifo, args->v0.vmm, args->v0.pushbuf, oclass, chan); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c index 66eb12c2b5ba..fa6e094d8068 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c @@ -100,8 +100,8 @@ void * nv50_fifo_dtor(struct nvkm_fifo *base) { struct nv50_fifo *fifo = nv50_fifo(base); - nvkm_memory_del(&fifo->runlist[1]); - nvkm_memory_del(&fifo->runlist[0]); + nvkm_memory_unref(&fifo->runlist[1]); + nvkm_memory_unref(&fifo->runlist[0]); return fifo; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c index bc77eea351a5..881015080d83 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c @@ -986,14 +986,14 @@ gf100_grctx_pack_tpc[] = { ******************************************************************************/ int -gf100_grctx_mmio_data(struct gf100_grctx *info, u32 size, u32 align, u32 access) +gf100_grctx_mmio_data(struct gf100_grctx *info, u32 size, u32 align, bool priv) { if (info->data) { info->buffer[info->buffer_nr] = round_up(info->addr, align); info->addr = info->buffer[info->buffer_nr] + size; info->data->size = size; info->data->align = align; - info->data->access = access; + info->data->priv = priv; info->data++; return info->buffer_nr++; } @@ -1028,9 +1028,8 @@ void gf100_grctx_generate_bundle(struct gf100_grctx *info) { const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS; const int s = 8; - const int b = mmio_vram(info, grctx->bundle_size, (1 << s), access); + const int b = mmio_vram(info, grctx->bundle_size, (1 << s), true); mmio_refn(info, 0x408004, 0x00000000, s, b); mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s)); mmio_refn(info, 0x418808, 0x00000000, s, b); @@ -1041,9 +1040,8 @@ void gf100_grctx_generate_pagepool(struct gf100_grctx *info) { const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS; const int s = 8; - const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), access); + const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true); mmio_refn(info, 0x40800c, 0x00000000, s, b); mmio_wr32(info, 0x408010, 0x80000000); mmio_refn(info, 0x419004, 0x00000000, s, b); @@ -1057,9 +1055,8 @@ gf100_grctx_generate_attrib(struct gf100_grctx *info) const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 attrib = grctx->attrib_nr; const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const u32 access = NV_MEM_ACCESS_RW; const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), access); + const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false); int gpc, tpc; u32 bo = 0; @@ -1267,85 +1264,87 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) nvkm_mc_unk260(device, 1); } +#define CB_RESERVED 0x80000 + int gf100_grctx_generate(struct gf100_gr *gr) { const struct gf100_grctx_func *grctx = gr->func->grctx; struct nvkm_subdev *subdev = &gr->base.engine.subdev; struct nvkm_device *device = subdev->device; - struct nvkm_memory *chan; + struct nvkm_memory *inst = NULL; + struct nvkm_memory *data = NULL; + struct nvkm_vmm *vmm = NULL; + struct nvkm_vma *ctx = NULL; struct gf100_grctx info; int ret, i; u64 addr; - /* allocate memory to for a "channel", which we'll use to generate - * the default context values + /* Allocate memory to for a "channel", which we'll use to generate + * the default context values. */ - ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x80000 + gr->size, - 0x1000, true, &chan); - if (ret) { - nvkm_error(subdev, "failed to allocate chan memory, %d\n", ret); - return ret; - } + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, + 0x1000, 0x1000, true, &inst); + if (ret) + goto done; - addr = nvkm_memory_addr(chan); + ret = nvkm_vmm_new(device, 0, 0, NULL, 0, NULL, "grctx", &vmm); + if (ret) + goto done; - /* PGD pointer */ - nvkm_kmap(chan); - nvkm_wo32(chan, 0x0200, lower_32_bits(addr + 0x1000)); - nvkm_wo32(chan, 0x0204, upper_32_bits(addr + 0x1000)); - nvkm_wo32(chan, 0x0208, 0xffffffff); - nvkm_wo32(chan, 0x020c, 0x000000ff); + vmm->debug = subdev->debug; - /* PGT[0] pointer */ - nvkm_wo32(chan, 0x1000, 0x00000000); - nvkm_wo32(chan, 0x1004, 0x00000001 | (addr + 0x2000) >> 8); + ret = nvkm_vmm_join(vmm, inst); + if (ret) + goto done; - /* identity-map the whole "channel" into its own vm */ - for (i = 0; i < nvkm_memory_size(chan) / 4096; i++) { - u64 addr = ((nvkm_memory_addr(chan) + (i * 4096)) >> 8) | 1; - nvkm_wo32(chan, 0x2000 + (i * 8), lower_32_bits(addr)); - nvkm_wo32(chan, 0x2004 + (i * 8), upper_32_bits(addr)); - } + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, + CB_RESERVED + gr->size, 0, true, &data); + if (ret) + goto done; - /* context pointer (virt) */ - nvkm_wo32(chan, 0x0210, 0x00080004); - nvkm_wo32(chan, 0x0214, 0x00000000); - nvkm_done(chan); + ret = nvkm_vmm_get(vmm, 0, nvkm_memory_size(data), &ctx); + if (ret) + goto done; - nvkm_wr32(device, 0x100cb8, (addr + 0x1000) >> 8); - nvkm_wr32(device, 0x100cbc, 0x80000001); - nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x100c80) & 0x00008000) - break; - ); + ret = nvkm_memory_map(data, 0, vmm, ctx, NULL, 0); + if (ret) + goto done; + + + /* Setup context pointer. */ + nvkm_kmap(inst); + nvkm_wo32(inst, 0x0210, lower_32_bits(ctx->addr + CB_RESERVED) | 4); + nvkm_wo32(inst, 0x0214, upper_32_bits(ctx->addr + CB_RESERVED)); + nvkm_done(inst); - /* setup default state for mmio list construction */ + /* Setup default state for mmio list construction. */ info.gr = gr; info.data = gr->mmio_data; info.mmio = gr->mmio_list; - info.addr = 0x2000 + (i * 8); + info.addr = ctx->addr; info.buffer_nr = 0; - /* make channel current */ + /* Make channel current. */ + addr = nvkm_memory_addr(inst) >> 12; if (gr->firmware) { nvkm_wr32(device, 0x409840, 0x00000030); - nvkm_wr32(device, 0x409500, 0x80000000 | addr >> 12); + nvkm_wr32(device, 0x409500, 0x80000000 | addr); nvkm_wr32(device, 0x409504, 0x00000003); nvkm_msec(device, 2000, if (nvkm_rd32(device, 0x409800) & 0x00000010) break; ); - nvkm_kmap(chan); - nvkm_wo32(chan, 0x8001c, 1); - nvkm_wo32(chan, 0x80020, 0); - nvkm_wo32(chan, 0x80028, 0); - nvkm_wo32(chan, 0x8002c, 0); - nvkm_done(chan); + nvkm_kmap(data); + nvkm_wo32(data, 0x1c, 1); + nvkm_wo32(data, 0x20, 0); + nvkm_wo32(data, 0x28, 0); + nvkm_wo32(data, 0x2c, 0); + nvkm_done(data); } else { nvkm_wr32(device, 0x409840, 0x80000000); - nvkm_wr32(device, 0x409500, 0x80000000 | addr >> 12); + nvkm_wr32(device, 0x409500, 0x80000000 | addr); nvkm_wr32(device, 0x409504, 0x00000001); nvkm_msec(device, 2000, if (nvkm_rd32(device, 0x409800) & 0x80000000) @@ -1355,8 +1354,8 @@ gf100_grctx_generate(struct gf100_gr *gr) grctx->main(gr, &info); - /* trigger a context unload by unsetting the "next channel valid" bit - * and faking a context switch interrupt + /* Trigger a context unload by unsetting the "next channel valid" bit + * and faking a context switch interrupt. */ nvkm_mask(device, 0x409b04, 0x80000000, 0x00000000); nvkm_wr32(device, 0x409000, 0x00000100); @@ -1370,17 +1369,21 @@ gf100_grctx_generate(struct gf100_gr *gr) gr->data = kmalloc(gr->size, GFP_KERNEL); if (gr->data) { - nvkm_kmap(chan); + nvkm_kmap(data); for (i = 0; i < gr->size; i += 4) - gr->data[i / 4] = nvkm_ro32(chan, 0x80000 + i); - nvkm_done(chan); + gr->data[i / 4] = nvkm_ro32(data, CB_RESERVED + i); + nvkm_done(data); ret = 0; } else { ret = -ENOMEM; } done: - nvkm_memory_del(&chan); + nvkm_vmm_put(vmm, &ctx); + nvkm_memory_unref(&data); + nvkm_vmm_part(vmm, inst); + nvkm_vmm_unref(&vmm); + nvkm_memory_unref(&inst); return ret; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h index 017180d147cf..4731e56fbb11 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h @@ -11,7 +11,7 @@ struct gf100_grctx { u64 addr; }; -int gf100_grctx_mmio_data(struct gf100_grctx *, u32 size, u32 align, u32 access); +int gf100_grctx_mmio_data(struct gf100_grctx *, u32 size, u32 align, bool priv); void gf100_grctx_mmio_item(struct gf100_grctx *, u32 addr, u32 data, int s, int); #define mmio_vram(a,b,c,d) gf100_grctx_mmio_data((a), (b), (c), (d)) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c index 505cdcbfc085..82f71b10c06e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c @@ -735,9 +735,8 @@ gf108_grctx_generate_attrib(struct gf100_grctx *info) const u32 alpha = grctx->alpha_nr; const u32 beta = grctx->attrib_nr; const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const u32 access = NV_MEM_ACCESS_RW; const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), access); + const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false); const int timeslice_mode = 1; const int max_batches = 0xffff; u32 bo = 0; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c index 74a64e3fd59a..19301d88577d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c @@ -187,9 +187,8 @@ gf117_grctx_generate_attrib(struct gf100_grctx *info) const u32 alpha = grctx->alpha_nr; const u32 beta = grctx->attrib_nr; const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const u32 access = NV_MEM_ACCESS_RW; const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), access); + const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false); const int timeslice_mode = 1; const int max_batches = 0xffff; u32 bo = 0; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c index c46b3fdf7203..825c8fd500bc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c @@ -847,9 +847,8 @@ gk104_grctx_generate_bundle(struct gf100_grctx *info) const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth, grctx->bundle_size / 0x20); const u32 token_limit = grctx->bundle_token_limit; - const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS; const int s = 8; - const int b = mmio_vram(info, grctx->bundle_size, (1 << s), access); + const int b = mmio_vram(info, grctx->bundle_size, (1 << s), true); mmio_refn(info, 0x408004, 0x00000000, s, b); mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s)); mmio_refn(info, 0x418808, 0x00000000, s, b); @@ -861,9 +860,8 @@ void gk104_grctx_generate_pagepool(struct gf100_grctx *info) { const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS; const int s = 8; - const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), access); + const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true); mmio_refn(info, 0x40800c, 0x00000000, s, b); mmio_wr32(info, 0x408010, 0x80000000); mmio_refn(info, 0x419004, 0x00000000, s, b); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c index 4c4b5ab6e46d..9b43d4ce3eaa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c @@ -867,9 +867,8 @@ gm107_grctx_generate_bundle(struct gf100_grctx *info) const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth, grctx->bundle_size / 0x20); const u32 token_limit = grctx->bundle_token_limit; - const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS; const int s = 8; - const int b = mmio_vram(info, grctx->bundle_size, (1 << s), access); + const int b = mmio_vram(info, grctx->bundle_size, (1 << s), true); mmio_refn(info, 0x408004, 0x00000000, s, b); mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s)); mmio_refn(info, 0x418e24, 0x00000000, s, b); @@ -881,9 +880,8 @@ void gm107_grctx_generate_pagepool(struct gf100_grctx *info) { const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS; const int s = 8; - const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), access); + const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true); mmio_refn(info, 0x40800c, 0x00000000, s, b); mmio_wr32(info, 0x408010, 0x80000000); mmio_refn(info, 0x419004, 0x00000000, s, b); @@ -900,9 +898,8 @@ gm107_grctx_generate_attrib(struct gf100_grctx *info) const u32 alpha = grctx->alpha_nr; const u32 attrib = grctx->attrib_nr; const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const u32 access = NV_MEM_ACCESS_RW; const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), access); + const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false); const int max_batches = 0xffff; u32 bo = 0; u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c index 7833bc777a29..88ea322d956c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c @@ -33,9 +33,8 @@ void gp100_grctx_generate_pagepool(struct gf100_grctx *info) { const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS; const int s = 8; - const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), access); + const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true); mmio_refn(info, 0x40800c, 0x00000000, s, b); mmio_wr32(info, 0x408010, 0x80000000); mmio_refn(info, 0x419004, 0x00000000, s, b); @@ -51,9 +50,8 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info) const u32 attrib = grctx->attrib_nr; const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); const u32 size = roundup(gr->tpc_total * pertpc, 0x80); - const u32 access = NV_MEM_ACCESS_RW; const int s = 12; - const int b = mmio_vram(info, size, (1 << s), access); + const int b = mmio_vram(info, size, (1 << s), false); const int max_batches = 0xffff; u32 ao = 0; u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c index 80b7ab0bee3a..7a66b4c2eb18 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c @@ -38,9 +38,8 @@ gp102_grctx_generate_attrib(struct gf100_grctx *info) const u32 attrib = grctx->attrib_nr; const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); const u32 size = roundup(gr->tpc_total * pertpc, 0x80); - const u32 access = NV_MEM_ACCESS_RW; const int s = 12; - const int b = mmio_vram(info, size, (1 << s), access); + const int b = mmio_vram(info, size, (1 << s), false); const int max_batches = 0xffff; u32 ao = 0; u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 99689f4de502..2f8dc107047d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -37,6 +37,7 @@ #include <nvif/class.h> #include <nvif/cl9097.h> +#include <nvif/if900d.h> #include <nvif/unpack.h> /******************************************************************************* @@ -327,13 +328,13 @@ gf100_gr_chan_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent, if (!gr->firmware) { nvkm_wo32(*pgpuobj, 0x00, chan->mmio_nr / 2); - nvkm_wo32(*pgpuobj, 0x04, chan->mmio_vma.offset >> 8); + nvkm_wo32(*pgpuobj, 0x04, chan->mmio_vma->addr >> 8); } else { nvkm_wo32(*pgpuobj, 0xf4, 0); nvkm_wo32(*pgpuobj, 0xf8, 0); nvkm_wo32(*pgpuobj, 0x10, chan->mmio_nr / 2); - nvkm_wo32(*pgpuobj, 0x14, lower_32_bits(chan->mmio_vma.offset)); - nvkm_wo32(*pgpuobj, 0x18, upper_32_bits(chan->mmio_vma.offset)); + nvkm_wo32(*pgpuobj, 0x14, lower_32_bits(chan->mmio_vma->addr)); + nvkm_wo32(*pgpuobj, 0x18, upper_32_bits(chan->mmio_vma->addr)); nvkm_wo32(*pgpuobj, 0x1c, 1); nvkm_wo32(*pgpuobj, 0x20, 0); nvkm_wo32(*pgpuobj, 0x28, 0); @@ -350,18 +351,13 @@ gf100_gr_chan_dtor(struct nvkm_object *object) int i; for (i = 0; i < ARRAY_SIZE(chan->data); i++) { - if (chan->data[i].vma.node) { - nvkm_vm_unmap(&chan->data[i].vma); - nvkm_vm_put(&chan->data[i].vma); - } - nvkm_memory_del(&chan->data[i].mem); + nvkm_vmm_put(chan->vmm, &chan->data[i].vma); + nvkm_memory_unref(&chan->data[i].mem); } - if (chan->mmio_vma.node) { - nvkm_vm_unmap(&chan->mmio_vma); - nvkm_vm_put(&chan->mmio_vma); - } - nvkm_memory_del(&chan->mmio); + nvkm_vmm_put(chan->vmm, &chan->mmio_vma); + nvkm_memory_unref(&chan->mmio); + nvkm_vmm_unref(&chan->vmm); return chan; } @@ -380,6 +376,7 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, struct gf100_gr_data *data = gr->mmio_data; struct gf100_gr_mmio *mmio = gr->mmio_list; struct gf100_gr_chan *chan; + struct gf100_vmm_map_v0 args = { .priv = 1 }; struct nvkm_device *device = gr->base.engine.subdev.device; int ret, i; @@ -387,6 +384,7 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, return -ENOMEM; nvkm_object_ctor(&gf100_gr_chan, oclass, &chan->object); chan->gr = gr; + chan->vmm = nvkm_vmm_ref(fifoch->vmm); *pobject = &chan->object; /* allocate memory for a "mmio list" buffer that's used by the HUB @@ -398,12 +396,14 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, if (ret) return ret; - ret = nvkm_vm_get(fifoch->vm, 0x1000, 12, NV_MEM_ACCESS_RW | - NV_MEM_ACCESS_SYS, &chan->mmio_vma); + ret = nvkm_vmm_get(fifoch->vmm, 12, 0x1000, &chan->mmio_vma); if (ret) return ret; - nvkm_memory_map(chan->mmio, &chan->mmio_vma, 0); + ret = nvkm_memory_map(chan->mmio, 0, fifoch->vmm, + chan->mmio_vma, &args, sizeof(args)); + if (ret) + return ret; /* allocate buffers referenced by mmio list */ for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) { @@ -413,13 +413,19 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, if (ret) return ret; - ret = nvkm_vm_get(fifoch->vm, - nvkm_memory_size(chan->data[i].mem), 12, - data->access, &chan->data[i].vma); + ret = nvkm_vmm_get(fifoch->vmm, 12, + nvkm_memory_size(chan->data[i].mem), + &chan->data[i].vma); + if (ret) + return ret; + + args.priv = data->priv; + + ret = nvkm_memory_map(chan->data[i].mem, 0, chan->vmm, + chan->data[i].vma, &args, sizeof(args)); if (ret) return ret; - nvkm_memory_map(chan->data[i].mem, &chan->data[i].vma, 0); data++; } @@ -430,7 +436,7 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, u32 data = mmio->data; if (mmio->buffer >= 0) { - u64 info = chan->data[mmio->buffer].vma.offset; + u64 info = chan->data[mmio->buffer].vma->addr; data |= info >> mmio->shift; } @@ -1855,8 +1861,12 @@ gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname, int ret; ret = nvkm_firmware_get(device, fwname, &fw); - if (ret) - return gf100_gr_ctor_fw_legacy(gr, fwname, fuc, ret); + if (ret) { + ret = gf100_gr_ctor_fw_legacy(gr, fwname, fuc, ret); + if (ret) + return -ENODEV; + return 0; + } fuc->size = fw->size; fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL); @@ -1903,25 +1913,33 @@ gf100_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device, return 0; } +void +gf100_gr_init_gpc_mmu(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + struct nvkm_fb *fb = device->fb; + + nvkm_wr32(device, 0x418880, nvkm_rd32(device, 0x100c80) & 0x00000001); + nvkm_wr32(device, 0x4188a4, 0x00000000); + nvkm_wr32(device, 0x418888, 0x00000000); + nvkm_wr32(device, 0x41888c, 0x00000000); + nvkm_wr32(device, 0x418890, 0x00000000); + nvkm_wr32(device, 0x418894, 0x00000000); + nvkm_wr32(device, 0x4188b4, nvkm_memory_addr(fb->mmu_wr) >> 8); + nvkm_wr32(device, 0x4188b8, nvkm_memory_addr(fb->mmu_rd) >> 8); +} + int gf100_gr_init(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - struct nvkm_fb *fb = device->fb; const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); u32 data[TPC_MAX / 8] = {}; u8 tpcnr[GPC_MAX]; int gpc, tpc, rop; int i; - nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(fb->mmu_wr) >> 8); - nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(fb->mmu_rd) >> 8); + gr->func->init_gpc_mmu(gr); gf100_gr_mmio(gr, gr->func->mmio); @@ -2036,6 +2054,7 @@ gf100_gr_gpccs_ucode = { static const struct gf100_gr_func gf100_gr = { .init = gf100_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .mmio = gf100_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h index a36e45a4a635..d7c2adb9b543 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -45,7 +45,7 @@ struct gf100_gr_data { u32 size; u32 align; - u32 access; + bool priv; }; struct gf100_gr_mmio { @@ -156,18 +156,20 @@ int gp100_gr_init(struct gf100_gr *); void gp100_gr_init_rop_active_fbps(struct gf100_gr *); #define gf100_gr_chan(p) container_of((p), struct gf100_gr_chan, object) +#include <core/object.h> struct gf100_gr_chan { struct nvkm_object object; struct gf100_gr *gr; + struct nvkm_vmm *vmm; struct nvkm_memory *mmio; - struct nvkm_vma mmio_vma; + struct nvkm_vma *mmio_vma; int mmio_nr; struct { struct nvkm_memory *mem; - struct nvkm_vma vma; + struct nvkm_vma *vma; } data[4]; }; @@ -253,6 +255,7 @@ extern const struct gf100_gr_init gf100_gr_init_mpc_0[]; extern const struct gf100_gr_init gf100_gr_init_be_0[]; extern const struct gf100_gr_init gf100_gr_init_fe_1[]; extern const struct gf100_gr_init gf100_gr_init_pe_1[]; +void gf100_gr_init_gpc_mmu(struct gf100_gr *); extern const struct gf100_gr_init gf104_gr_init_ds_0[]; extern const struct gf100_gr_init gf104_gr_init_tex_0[]; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c index d736dcd55ea2..ec0f11983b23 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c @@ -115,6 +115,7 @@ gf104_gr_pack_mmio[] = { static const struct gf100_gr_func gf104_gr = { .init = gf100_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .mmio = gf104_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c index 2f0d24498427..cc152eb74123 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c @@ -106,6 +106,7 @@ gf108_gr_pack_mmio[] = { static const struct gf100_gr_func gf108_gr = { .init = gf100_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .mmio = gf108_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c index d1d942eb86af..10d2d73ca8c3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c @@ -87,6 +87,7 @@ gf110_gr_pack_mmio[] = { static const struct gf100_gr_func gf110_gr = { .init = gf100_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .mmio = gf110_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c index 0124e468086e..ac09a07c4150 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c @@ -123,6 +123,7 @@ gf117_gr_gpccs_ucode = { static const struct gf100_gr_func gf117_gr = { .init = gf100_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .mmio = gf117_gr_pack_mmio, .fecs.ucode = &gf117_gr_fecs_ucode, .gpccs.ucode = &gf117_gr_gpccs_ucode, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c index 8d8e4cafe28f..7f449ec6f760 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c @@ -178,6 +178,7 @@ gf119_gr_pack_mmio[] = { static const struct gf100_gr_func gf119_gr = { .init = gf100_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .mmio = gf119_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c index ec22da6c99fc..5e82f94c2245 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c @@ -24,8 +24,6 @@ #include "gf100.h" #include "ctxgf100.h" -#include <subdev/fb.h> - #include <nvif/class.h> /******************************************************************************* @@ -207,21 +205,13 @@ int gk104_gr_init(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - struct nvkm_fb *fb = device->fb; const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); u32 data[TPC_MAX / 8] = {}; u8 tpcnr[GPC_MAX]; int gpc, tpc, rop; int i; - nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(fb->mmu_wr) >> 8); - nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(fb->mmu_rd) >> 8); + gr->func->init_gpc_mmu(gr); gf100_gr_mmio(gr, gr->func->mmio); @@ -339,6 +329,7 @@ gk104_gr_gpccs_ucode = { static const struct gf100_gr_func gk104_gr = { .init = gk104_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .mmio = gk104_gr_pack_mmio, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c index f31b171a4102..a38e19b61c1d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c @@ -183,6 +183,7 @@ gk110_gr_gpccs_ucode = { static const struct gf100_gr_func gk110_gr = { .init = gk104_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .mmio = gk110_gr_pack_mmio, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c index d76dd178007f..1912c0bfd7ee 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c @@ -103,6 +103,7 @@ gk110b_gr_pack_mmio[] = { static const struct gf100_gr_func gk110b_gr = { .init = gk104_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .mmio = gk110b_gr_pack_mmio, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c index 14bbe6ed02a9..1fc258163f25 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c @@ -162,6 +162,7 @@ gk208_gr_gpccs_ucode = { static const struct gf100_gr_func gk208_gr = { .init = gk104_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .mmio = gk208_gr_pack_mmio, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c index d1dc92999dc0..d6840dc81a29 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c @@ -59,7 +59,7 @@ void * nv20_gr_chan_dtor(struct nvkm_object *object) { struct nv20_gr_chan *chan = nv20_gr_chan(object); - nvkm_memory_del(&chan->inst); + nvkm_memory_unref(&chan->inst); return chan; } @@ -323,7 +323,7 @@ void * nv20_gr_dtor(struct nvkm_gr *base) { struct nv20_gr *gr = nv20_gr(base); - nvkm_memory_del(&gr->ctxtab); + nvkm_memory_unref(&gr->ctxtab); return gr; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.h index cdf4501e3798..d0cb2b8846ec 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.h @@ -19,6 +19,7 @@ void nv20_gr_tile(struct nvkm_gr *, int, struct nvkm_fb_tile *); int nv30_gr_init(struct nvkm_gr *); #define nv20_gr_chan(p) container_of((p), struct nv20_gr_chan, object) +#include <core/object.h> struct nv20_gr_chan { struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h index 2812ed11f877..bee8ef2d5697 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h @@ -16,6 +16,7 @@ void nv40_gr_intr(struct nvkm_gr *); u64 nv40_gr_units(struct nvkm_gr *); #define nv40_gr_chan(p) container_of((p), struct nv40_gr_chan, object) +#include <core/object.h> struct nv40_gr_chan { struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h index 45eec83a5969..1ab6ea436b70 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h @@ -19,6 +19,7 @@ u64 nv50_gr_units(struct nvkm_gr *); int g84_gr_tlb_flush(struct nvkm_gr *); #define nv50_gr_chan(p) container_of((p), struct nv50_gr_chan, object) +#include <core/object.h> struct nv50_gr_chan { struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.h b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.h index d3bb34fcdebf..f0d35beb58df 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.h @@ -18,6 +18,7 @@ struct nv31_mpeg_func { }; #define nv31_mpeg_chan(p) container_of((p), struct nv31_mpeg_chan, object) +#include <core/object.h> struct nv31_mpeg_chan { struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv50.c index 4e528851e9c0..6df880a39019 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv50.c @@ -24,6 +24,7 @@ #include "priv.h" #include <core/gpuobj.h> +#include <core/object.h> #include <subdev/timer.h> #include <nvif/class.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/pm/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/pm/priv.h index d7b81cbf82b5..4ff0475e776c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/pm/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/pm/priv.h @@ -67,6 +67,7 @@ struct nvkm_specdom { }; #define nvkm_perfdom(p) container_of((p), struct nvkm_perfdom, object) +#include <core/object.h> struct nvkm_perfdom { struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h b/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h index 6608bf6c6842..b5be49f0ac56 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h @@ -1,9 +1,11 @@ #ifndef __NVKM_SW_CHAN_H__ #define __NVKM_SW_CHAN_H__ #define nvkm_sw_chan(p) container_of((p), struct nvkm_sw_chan, object) -#include "priv.h" +#include <core/object.h> #include <core/event.h> +#include "priv.h" + struct nvkm_sw_chan { const struct nvkm_sw_chan_func *func; struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.h b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.h index 943ef4c10091..bcfff62131fe 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.h @@ -1,7 +1,7 @@ #ifndef __NVKM_NVSW_H__ #define __NVKM_NVSW_H__ #define nvkm_nvsw(p) container_of((p), struct nvkm_nvsw, object) -#include "priv.h" +#include <core/object.h> struct nvkm_nvsw { struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c b/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c index 06bdb67a0205..70549381e082 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c @@ -86,7 +86,7 @@ nvkm_xtensa_fini(struct nvkm_engine *engine, bool suspend) nvkm_wr32(device, base + 0xd94, 0); /* FIFO_CTRL */ if (!suspend) - nvkm_memory_del(&xtensa->gpu_fw); + nvkm_memory_unref(&xtensa->gpu_fw); return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c index 1b7f48efd8b1..14be41f24155 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c @@ -60,7 +60,7 @@ nvkm_falcon_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size, u8 port, } void -nvkm_falcon_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *inst) +nvkm_falcon_bind_context(struct nvkm_falcon *falcon, struct nvkm_memory *inst) { if (!falcon->func->bind_context) { nvkm_error(falcon->user, diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c index 669c24028470..9def926f24d4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c @@ -180,7 +180,7 @@ nvkm_falcon_v1_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size, } static void -nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *ctx) +nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_memory *ctx) { u32 inst_loc; u32 fbif; @@ -216,7 +216,7 @@ nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *ctx) nvkm_falcon_wr32(falcon, fbif + 4 * FALCON_DMAIDX_PHYS_SYS_NCOH, 0x6); /* Set context */ - switch (nvkm_memory_target(ctx->memory)) { + switch (nvkm_memory_target(ctx)) { case NVKM_MEM_TARGET_VRAM: inst_loc = 0; break; case NVKM_MEM_TARGET_HOST: inst_loc = 2; break; case NVKM_MEM_TARGET_NCOH: inst_loc = 3; break; @@ -228,7 +228,7 @@ nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *ctx) /* Enable context */ nvkm_falcon_mask(falcon, 0x048, 0x1, 0x1); nvkm_falcon_wr32(falcon, 0x054, - ((ctx->addr >> 12) & 0xfffffff) | + ((nvkm_memory_addr(ctx) >> 12) & 0xfffffff) | (inst_loc << 28) | (1 << 30)); nvkm_falcon_mask(falcon, 0x090, 0x10000, 0x10000); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild index 1e138b337955..e5830453813d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild @@ -3,3 +3,5 @@ nvkm-y += nvkm/subdev/bar/nv50.o nvkm-y += nvkm/subdev/bar/g84.o nvkm-y += nvkm/subdev/bar/gf100.o nvkm-y += nvkm/subdev/bar/gk20a.o +nvkm-y += nvkm/subdev/bar/gm107.o +nvkm-y += nvkm/subdev/bar/gm20b.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c index c561d148cebc..9646adec57cb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c @@ -30,39 +30,76 @@ nvkm_bar_flush(struct nvkm_bar *bar) bar->func->flush(bar); } -struct nvkm_vm * -nvkm_bar_kmap(struct nvkm_bar *bar) +struct nvkm_vmm * +nvkm_bar_bar1_vmm(struct nvkm_device *device) { - /* disallow kmap() until after vm has been bootstrapped */ - if (bar && bar->func->kmap && bar->subdev.oneinit) - return bar->func->kmap(bar); + return device->bar->func->bar1.vmm(device->bar); +} + +struct nvkm_vmm * +nvkm_bar_bar2_vmm(struct nvkm_device *device) +{ + /* Denies access to BAR2 when it's not initialised, used by INSTMEM + * to know when object access needs to go through the BAR0 window. + */ + struct nvkm_bar *bar = device->bar; + if (bar && bar->bar2) + return bar->func->bar2.vmm(bar); return NULL; } -int -nvkm_bar_umap(struct nvkm_bar *bar, u64 size, int type, struct nvkm_vma *vma) +void +nvkm_bar_bar2_fini(struct nvkm_device *device) { - return bar->func->umap(bar, size, type, vma); + struct nvkm_bar *bar = device->bar; + if (bar && bar->bar2) { + bar->func->bar2.fini(bar); + bar->bar2 = false; + } +} + +void +nvkm_bar_bar2_init(struct nvkm_device *device) +{ + struct nvkm_bar *bar = device->bar; + if (bar && bar->subdev.oneinit && !bar->bar2 && bar->func->bar2.init) { + bar->func->bar2.init(bar); + bar->func->bar2.wait(bar); + bar->bar2 = true; + } } static int -nvkm_bar_oneinit(struct nvkm_subdev *subdev) +nvkm_bar_fini(struct nvkm_subdev *subdev, bool suspend) { struct nvkm_bar *bar = nvkm_bar(subdev); - return bar->func->oneinit(bar); + bar->func->bar1.fini(bar); + return 0; } static int nvkm_bar_init(struct nvkm_subdev *subdev) { struct nvkm_bar *bar = nvkm_bar(subdev); - return bar->func->init(bar); + bar->func->bar1.init(bar); + bar->func->bar1.wait(bar); + if (bar->func->init) + bar->func->init(bar); + return 0; +} + +static int +nvkm_bar_oneinit(struct nvkm_subdev *subdev) +{ + struct nvkm_bar *bar = nvkm_bar(subdev); + return bar->func->oneinit(bar); } static void * nvkm_bar_dtor(struct nvkm_subdev *subdev) { struct nvkm_bar *bar = nvkm_bar(subdev); + nvkm_bar_bar2_fini(subdev->device); return bar->func->dtor(bar); } @@ -71,6 +108,7 @@ nvkm_bar = { .dtor = nvkm_bar_dtor, .oneinit = nvkm_bar_oneinit, .init = nvkm_bar_init, + .fini = nvkm_bar_fini, }; void diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/g84.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/g84.c index ef717136c838..87f26f54b481 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/g84.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/g84.c @@ -44,8 +44,14 @@ g84_bar_func = { .dtor = nv50_bar_dtor, .oneinit = nv50_bar_oneinit, .init = nv50_bar_init, - .kmap = nv50_bar_kmap, - .umap = nv50_bar_umap, + .bar1.init = nv50_bar_bar1_init, + .bar1.fini = nv50_bar_bar1_fini, + .bar1.wait = nv50_bar_bar1_wait, + .bar1.vmm = nv50_bar_bar1_vmm, + .bar2.init = nv50_bar_bar2_init, + .bar2.fini = nv50_bar_bar2_fini, + .bar2.wait = nv50_bar_bar1_wait, + .bar2.vmm = nv50_bar_bar2_vmm, .flush = g84_bar_flush, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c index 676c167c95b9..a3ba7f50198b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c @@ -23,39 +23,73 @@ */ #include "gf100.h" -#include <core/gpuobj.h> +#include <core/memory.h> #include <core/option.h> #include <subdev/fb.h> #include <subdev/mmu.h> -static struct nvkm_vm * -gf100_bar_kmap(struct nvkm_bar *base) +struct nvkm_vmm * +gf100_bar_bar1_vmm(struct nvkm_bar *base) { - return gf100_bar(base)->bar[0].vm; + return gf100_bar(base)->bar[1].vmm; } -int -gf100_bar_umap(struct nvkm_bar *base, u64 size, int type, struct nvkm_vma *vma) +void +gf100_bar_bar1_wait(struct nvkm_bar *base) +{ + /* NFI why it's twice. */ + nvkm_bar_flush(base); + nvkm_bar_flush(base); +} + +void +gf100_bar_bar1_fini(struct nvkm_bar *bar) { + nvkm_mask(bar->subdev.device, 0x001704, 0x80000000, 0x00000000); +} + +void +gf100_bar_bar1_init(struct nvkm_bar *base) +{ + struct nvkm_device *device = base->subdev.device; struct gf100_bar *bar = gf100_bar(base); - return nvkm_vm_get(bar->bar[1].vm, size, type, NV_MEM_ACCESS_RW, vma); + const u32 addr = nvkm_memory_addr(bar->bar[1].inst) >> 12; + nvkm_wr32(device, 0x001704, 0x80000000 | addr); +} + +struct nvkm_vmm * +gf100_bar_bar2_vmm(struct nvkm_bar *base) +{ + return gf100_bar(base)->bar[0].vmm; +} + +void +gf100_bar_bar2_fini(struct nvkm_bar *bar) +{ + nvkm_mask(bar->subdev.device, 0x001714, 0x80000000, 0x00000000); +} + +void +gf100_bar_bar2_init(struct nvkm_bar *base) +{ + struct nvkm_device *device = base->subdev.device; + struct gf100_bar *bar = gf100_bar(base); + u32 addr = nvkm_memory_addr(bar->bar[0].inst) >> 12; + if (bar->bar2_halve) + addr |= 0x40000000; + nvkm_wr32(device, 0x001714, 0x80000000 | addr); } static int -gf100_bar_ctor_vm(struct gf100_bar *bar, struct gf100_bar_vm *bar_vm, - struct lock_class_key *key, int bar_nr) +gf100_bar_oneinit_bar(struct gf100_bar *bar, struct gf100_barN *bar_vm, + struct lock_class_key *key, int bar_nr) { struct nvkm_device *device = bar->base.subdev.device; - struct nvkm_vm *vm; resource_size_t bar_len; int ret; ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0, false, - &bar_vm->mem); - if (ret) - return ret; - - ret = nvkm_gpuobj_new(device, 0x8000, 0, false, NULL, &bar_vm->pgd); + &bar_vm->inst); if (ret) return ret; @@ -63,98 +97,64 @@ gf100_bar_ctor_vm(struct gf100_bar *bar, struct gf100_bar_vm *bar_vm, if (bar_nr == 3 && bar->bar2_halve) bar_len >>= 1; - ret = nvkm_vm_new(device, 0, bar_len, 0, key, &vm); + ret = nvkm_vmm_new(device, 0, bar_len, NULL, 0, key, + (bar_nr == 3) ? "bar2" : "bar1", &bar_vm->vmm); if (ret) return ret; - atomic_inc(&vm->engref[NVKM_SUBDEV_BAR]); + atomic_inc(&bar_vm->vmm->engref[NVKM_SUBDEV_BAR]); + bar_vm->vmm->debug = bar->base.subdev.debug; /* * Bootstrap page table lookup. */ if (bar_nr == 3) { - ret = nvkm_vm_boot(vm, bar_len); - if (ret) { - nvkm_vm_ref(NULL, &vm, NULL); + ret = nvkm_vmm_boot(bar_vm->vmm); + if (ret) return ret; - } } - ret = nvkm_vm_ref(vm, &bar_vm->vm, bar_vm->pgd); - nvkm_vm_ref(NULL, &vm, NULL); - if (ret) - return ret; - - nvkm_kmap(bar_vm->mem); - nvkm_wo32(bar_vm->mem, 0x0200, lower_32_bits(bar_vm->pgd->addr)); - nvkm_wo32(bar_vm->mem, 0x0204, upper_32_bits(bar_vm->pgd->addr)); - nvkm_wo32(bar_vm->mem, 0x0208, lower_32_bits(bar_len - 1)); - nvkm_wo32(bar_vm->mem, 0x020c, upper_32_bits(bar_len - 1)); - nvkm_done(bar_vm->mem); - return 0; + return nvkm_vmm_join(bar_vm->vmm, bar_vm->inst); } int gf100_bar_oneinit(struct nvkm_bar *base) { static struct lock_class_key bar1_lock; - static struct lock_class_key bar3_lock; + static struct lock_class_key bar2_lock; struct gf100_bar *bar = gf100_bar(base); int ret; - /* BAR3 */ - if (bar->base.func->kmap) { - ret = gf100_bar_ctor_vm(bar, &bar->bar[0], &bar3_lock, 3); + /* BAR2 */ + if (bar->base.func->bar2.init) { + ret = gf100_bar_oneinit_bar(bar, &bar->bar[0], &bar2_lock, 3); if (ret) return ret; + + bar->base.subdev.oneinit = true; + nvkm_bar_bar2_init(bar->base.subdev.device); } /* BAR1 */ - ret = gf100_bar_ctor_vm(bar, &bar->bar[1], &bar1_lock, 1); + ret = gf100_bar_oneinit_bar(bar, &bar->bar[1], &bar1_lock, 1); if (ret) return ret; return 0; } -int -gf100_bar_init(struct nvkm_bar *base) -{ - struct gf100_bar *bar = gf100_bar(base); - struct nvkm_device *device = bar->base.subdev.device; - u32 addr; - - nvkm_mask(device, 0x000200, 0x00000100, 0x00000000); - nvkm_mask(device, 0x000200, 0x00000100, 0x00000100); - - addr = nvkm_memory_addr(bar->bar[1].mem) >> 12; - nvkm_wr32(device, 0x001704, 0x80000000 | addr); - - if (bar->bar[0].mem) { - addr = nvkm_memory_addr(bar->bar[0].mem) >> 12; - if (bar->bar2_halve) - addr |= 0x40000000; - nvkm_wr32(device, 0x001714, 0x80000000 | addr); - } - - return 0; -} - void * gf100_bar_dtor(struct nvkm_bar *base) { struct gf100_bar *bar = gf100_bar(base); - nvkm_vm_ref(NULL, &bar->bar[1].vm, bar->bar[1].pgd); - nvkm_gpuobj_del(&bar->bar[1].pgd); - nvkm_memory_del(&bar->bar[1].mem); + nvkm_vmm_part(bar->bar[1].vmm, bar->bar[1].inst); + nvkm_vmm_unref(&bar->bar[1].vmm); + nvkm_memory_unref(&bar->bar[1].inst); - if (bar->bar[0].vm) { - nvkm_memory_del(&bar->bar[0].vm->pgt[0].mem[0]); - nvkm_vm_ref(NULL, &bar->bar[0].vm, bar->bar[0].pgd); - } - nvkm_gpuobj_del(&bar->bar[0].pgd); - nvkm_memory_del(&bar->bar[0].mem); + nvkm_vmm_part(bar->bar[0].vmm, bar->bar[0].inst); + nvkm_vmm_unref(&bar->bar[0].vmm); + nvkm_memory_unref(&bar->bar[0].inst); return bar; } @@ -175,9 +175,14 @@ static const struct nvkm_bar_func gf100_bar_func = { .dtor = gf100_bar_dtor, .oneinit = gf100_bar_oneinit, - .init = gf100_bar_init, - .kmap = gf100_bar_kmap, - .umap = gf100_bar_umap, + .bar1.init = gf100_bar_bar1_init, + .bar1.fini = gf100_bar_bar1_fini, + .bar1.wait = gf100_bar_bar1_wait, + .bar1.vmm = gf100_bar_bar1_vmm, + .bar2.init = gf100_bar_bar2_init, + .bar2.fini = gf100_bar_bar2_fini, + .bar2.wait = gf100_bar_bar1_wait, + .bar2.vmm = gf100_bar_bar2_vmm, .flush = g84_bar_flush, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.h b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.h index 20a5255362ba..e4da39139e95 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.h @@ -3,22 +3,24 @@ #define gf100_bar(p) container_of((p), struct gf100_bar, base) #include "priv.h" -struct gf100_bar_vm { - struct nvkm_memory *mem; - struct nvkm_gpuobj *pgd; - struct nvkm_vm *vm; +struct gf100_barN { + struct nvkm_memory *inst; + struct nvkm_vmm *vmm; }; struct gf100_bar { struct nvkm_bar base; bool bar2_halve; - struct gf100_bar_vm bar[2]; + struct gf100_barN bar[2]; }; int gf100_bar_new_(const struct nvkm_bar_func *, struct nvkm_device *, int, struct nvkm_bar **); void *gf100_bar_dtor(struct nvkm_bar *); int gf100_bar_oneinit(struct nvkm_bar *); -int gf100_bar_init(struct nvkm_bar *); -int gf100_bar_umap(struct nvkm_bar *, u64, int, struct nvkm_vma *); +void gf100_bar_bar1_init(struct nvkm_bar *); +void gf100_bar_bar1_wait(struct nvkm_bar *); +struct nvkm_vmm *gf100_bar_bar1_vmm(struct nvkm_bar *); +void gf100_bar_bar2_init(struct nvkm_bar *); +struct nvkm_vmm *gf100_bar_bar2_vmm(struct nvkm_bar *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c index 9232fab4274c..b10077d38839 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c @@ -25,8 +25,10 @@ static const struct nvkm_bar_func gk20a_bar_func = { .dtor = gf100_bar_dtor, .oneinit = gf100_bar_oneinit, - .init = gf100_bar_init, - .umap = gf100_bar_umap, + .bar1.init = gf100_bar_bar1_init, + .bar1.fini = gf100_bar_bar1_fini, + .bar1.wait = gf100_bar_bar1_wait, + .bar1.vmm = gf100_bar_bar1_vmm, .flush = g84_bar_flush, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gm107.c new file mode 100644 index 000000000000..3ddf9222d935 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gm107.c @@ -0,0 +1,65 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gf100.h" + +#include <subdev/timer.h> + +void +gm107_bar_bar1_wait(struct nvkm_bar *bar) +{ + struct nvkm_device *device = bar->subdev.device; + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x001710) & 0x00000003)) + break; + ); +} + +static void +gm107_bar_bar2_wait(struct nvkm_bar *bar) +{ + struct nvkm_device *device = bar->subdev.device; + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x001710) & 0x0000000c)) + break; + ); +} + +static const struct nvkm_bar_func +gm107_bar_func = { + .dtor = gf100_bar_dtor, + .oneinit = gf100_bar_oneinit, + .bar1.init = gf100_bar_bar1_init, + .bar1.fini = gf100_bar_bar1_fini, + .bar1.wait = gm107_bar_bar1_wait, + .bar1.vmm = gf100_bar_bar1_vmm, + .bar2.init = gf100_bar_bar2_init, + .bar2.fini = gf100_bar_bar2_fini, + .bar2.wait = gm107_bar_bar2_wait, + .bar2.vmm = gf100_bar_bar2_vmm, + .flush = g84_bar_flush, +}; + +int +gm107_bar_new(struct nvkm_device *device, int index, struct nvkm_bar **pbar) +{ + return gf100_bar_new_(&gm107_bar_func, device, index, pbar); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gm20b.c new file mode 100644 index 000000000000..950bff1955ad --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gm20b.c @@ -0,0 +1,42 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gf100.h" + +static const struct nvkm_bar_func +gm20b_bar_func = { + .dtor = gf100_bar_dtor, + .oneinit = gf100_bar_oneinit, + .bar1.init = gf100_bar_bar1_init, + .bar1.fini = gf100_bar_bar1_fini, + .bar1.wait = gm107_bar_bar1_wait, + .bar1.vmm = gf100_bar_bar1_vmm, + .flush = g84_bar_flush, +}; + +int +gm20b_bar_new(struct nvkm_device *device, int index, struct nvkm_bar **pbar) +{ + int ret = gf100_bar_new_(&gm20b_bar_func, device, index, pbar); + if (ret == 0) + (*pbar)->iomap_uncached = true; + return ret; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c index 6eff637ac301..157b076a1272 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c @@ -28,19 +28,6 @@ #include <subdev/mmu.h> #include <subdev/timer.h> -struct nvkm_vm * -nv50_bar_kmap(struct nvkm_bar *base) -{ - return nv50_bar(base)->bar3_vm; -} - -int -nv50_bar_umap(struct nvkm_bar *base, u64 size, int type, struct nvkm_vma *vma) -{ - struct nv50_bar *bar = nv50_bar(base); - return nvkm_vm_get(bar->bar1_vm, size, type, NV_MEM_ACCESS_RW, vma); -} - static void nv50_bar_flush(struct nvkm_bar *base) { @@ -56,14 +43,72 @@ nv50_bar_flush(struct nvkm_bar *base) spin_unlock_irqrestore(&bar->base.lock, flags); } +struct nvkm_vmm * +nv50_bar_bar1_vmm(struct nvkm_bar *base) +{ + return nv50_bar(base)->bar1_vmm; +} + +void +nv50_bar_bar1_wait(struct nvkm_bar *base) +{ + nvkm_bar_flush(base); +} + +void +nv50_bar_bar1_fini(struct nvkm_bar *bar) +{ + nvkm_wr32(bar->subdev.device, 0x001708, 0x00000000); +} + +void +nv50_bar_bar1_init(struct nvkm_bar *base) +{ + struct nvkm_device *device = base->subdev.device; + struct nv50_bar *bar = nv50_bar(base); + nvkm_wr32(device, 0x001708, 0x80000000 | bar->bar1->node->offset >> 4); +} + +struct nvkm_vmm * +nv50_bar_bar2_vmm(struct nvkm_bar *base) +{ + return nv50_bar(base)->bar2_vmm; +} + +void +nv50_bar_bar2_fini(struct nvkm_bar *bar) +{ + nvkm_wr32(bar->subdev.device, 0x00170c, 0x00000000); +} + +void +nv50_bar_bar2_init(struct nvkm_bar *base) +{ + struct nvkm_device *device = base->subdev.device; + struct nv50_bar *bar = nv50_bar(base); + nvkm_wr32(device, 0x001704, 0x00000000 | bar->mem->addr >> 12); + nvkm_wr32(device, 0x001704, 0x40000000 | bar->mem->addr >> 12); + nvkm_wr32(device, 0x00170c, 0x80000000 | bar->bar2->node->offset >> 4); +} + +void +nv50_bar_init(struct nvkm_bar *base) +{ + struct nv50_bar *bar = nv50_bar(base); + struct nvkm_device *device = bar->base.subdev.device; + int i; + + for (i = 0; i < 8; i++) + nvkm_wr32(device, 0x001900 + (i * 4), 0x00000000); +} + int nv50_bar_oneinit(struct nvkm_bar *base) { struct nv50_bar *bar = nv50_bar(base); struct nvkm_device *device = bar->base.subdev.device; static struct lock_class_key bar1_lock; - static struct lock_class_key bar3_lock; - struct nvkm_vm *vm; + static struct lock_class_key bar2_lock; u64 start, limit; int ret; @@ -80,51 +125,54 @@ nv50_bar_oneinit(struct nvkm_bar *base) if (ret) return ret; - /* BAR3 */ + /* BAR2 */ start = 0x0100000000ULL; limit = start + device->func->resource_size(device, 3); - ret = nvkm_vm_new(device, start, limit - start, start, &bar3_lock, &vm); + ret = nvkm_vmm_new(device, start, limit-- - start, NULL, 0, + &bar2_lock, "bar2", &bar->bar2_vmm); if (ret) return ret; - atomic_inc(&vm->engref[NVKM_SUBDEV_BAR]); + atomic_inc(&bar->bar2_vmm->engref[NVKM_SUBDEV_BAR]); + bar->bar2_vmm->debug = bar->base.subdev.debug; - ret = nvkm_vm_boot(vm, limit-- - start); + ret = nvkm_vmm_boot(bar->bar2_vmm); if (ret) return ret; - ret = nvkm_vm_ref(vm, &bar->bar3_vm, bar->pgd); - nvkm_vm_ref(NULL, &vm, NULL); + ret = nvkm_vmm_join(bar->bar2_vmm, bar->mem->memory); if (ret) return ret; - ret = nvkm_gpuobj_new(device, 24, 16, false, bar->mem, &bar->bar3); + ret = nvkm_gpuobj_new(device, 24, 16, false, bar->mem, &bar->bar2); if (ret) return ret; - nvkm_kmap(bar->bar3); - nvkm_wo32(bar->bar3, 0x00, 0x7fc00000); - nvkm_wo32(bar->bar3, 0x04, lower_32_bits(limit)); - nvkm_wo32(bar->bar3, 0x08, lower_32_bits(start)); - nvkm_wo32(bar->bar3, 0x0c, upper_32_bits(limit) << 24 | + nvkm_kmap(bar->bar2); + nvkm_wo32(bar->bar2, 0x00, 0x7fc00000); + nvkm_wo32(bar->bar2, 0x04, lower_32_bits(limit)); + nvkm_wo32(bar->bar2, 0x08, lower_32_bits(start)); + nvkm_wo32(bar->bar2, 0x0c, upper_32_bits(limit) << 24 | upper_32_bits(start)); - nvkm_wo32(bar->bar3, 0x10, 0x00000000); - nvkm_wo32(bar->bar3, 0x14, 0x00000000); - nvkm_done(bar->bar3); + nvkm_wo32(bar->bar2, 0x10, 0x00000000); + nvkm_wo32(bar->bar2, 0x14, 0x00000000); + nvkm_done(bar->bar2); + + bar->base.subdev.oneinit = true; + nvkm_bar_bar2_init(device); /* BAR1 */ start = 0x0000000000ULL; limit = start + device->func->resource_size(device, 1); - ret = nvkm_vm_new(device, start, limit-- - start, start, &bar1_lock, &vm); - if (ret) - return ret; + ret = nvkm_vmm_new(device, start, limit-- - start, NULL, 0, + &bar1_lock, "bar1", &bar->bar1_vmm); - atomic_inc(&vm->engref[NVKM_SUBDEV_BAR]); + atomic_inc(&bar->bar1_vmm->engref[NVKM_SUBDEV_BAR]); + bar->bar1_vmm->debug = bar->base.subdev.debug; - ret = nvkm_vm_ref(vm, &bar->bar1_vm, bar->pgd); - nvkm_vm_ref(NULL, &vm, NULL); + ret = nvkm_vmm_join(bar->bar1_vmm, bar->mem->memory); if (ret) return ret; @@ -144,45 +192,21 @@ nv50_bar_oneinit(struct nvkm_bar *base) return 0; } -int -nv50_bar_init(struct nvkm_bar *base) -{ - struct nv50_bar *bar = nv50_bar(base); - struct nvkm_device *device = bar->base.subdev.device; - int i; - - nvkm_mask(device, 0x000200, 0x00000100, 0x00000000); - nvkm_mask(device, 0x000200, 0x00000100, 0x00000100); - nvkm_wr32(device, 0x100c80, 0x00060001); - if (nvkm_msec(device, 2000, - if (!(nvkm_rd32(device, 0x100c80) & 0x00000001)) - break; - ) < 0) - return -EBUSY; - - nvkm_wr32(device, 0x001704, 0x00000000 | bar->mem->addr >> 12); - nvkm_wr32(device, 0x001704, 0x40000000 | bar->mem->addr >> 12); - nvkm_wr32(device, 0x001708, 0x80000000 | bar->bar1->node->offset >> 4); - nvkm_wr32(device, 0x00170c, 0x80000000 | bar->bar3->node->offset >> 4); - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x001900 + (i * 4), 0x00000000); - return 0; -} - void * nv50_bar_dtor(struct nvkm_bar *base) { struct nv50_bar *bar = nv50_bar(base); - nvkm_gpuobj_del(&bar->bar1); - nvkm_vm_ref(NULL, &bar->bar1_vm, bar->pgd); - nvkm_gpuobj_del(&bar->bar3); - if (bar->bar3_vm) { - nvkm_memory_del(&bar->bar3_vm->pgt[0].mem[0]); - nvkm_vm_ref(NULL, &bar->bar3_vm, bar->pgd); + if (bar->mem) { + nvkm_gpuobj_del(&bar->bar1); + nvkm_vmm_part(bar->bar1_vmm, bar->mem->memory); + nvkm_vmm_unref(&bar->bar1_vmm); + nvkm_gpuobj_del(&bar->bar2); + nvkm_vmm_part(bar->bar2_vmm, bar->mem->memory); + nvkm_vmm_unref(&bar->bar2_vmm); + nvkm_gpuobj_del(&bar->pgd); + nvkm_gpuobj_del(&bar->pad); + nvkm_gpuobj_del(&bar->mem); } - nvkm_gpuobj_del(&bar->pgd); - nvkm_gpuobj_del(&bar->pad); - nvkm_gpuobj_del(&bar->mem); return bar; } @@ -204,8 +228,14 @@ nv50_bar_func = { .dtor = nv50_bar_dtor, .oneinit = nv50_bar_oneinit, .init = nv50_bar_init, - .kmap = nv50_bar_kmap, - .umap = nv50_bar_umap, + .bar1.init = nv50_bar_bar1_init, + .bar1.fini = nv50_bar_bar1_fini, + .bar1.wait = nv50_bar_bar1_wait, + .bar1.vmm = nv50_bar_bar1_vmm, + .bar2.init = nv50_bar_bar2_init, + .bar2.fini = nv50_bar_bar2_fini, + .bar2.wait = nv50_bar_bar1_wait, + .bar2.vmm = nv50_bar_bar2_vmm, .flush = nv50_bar_flush, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.h b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.h index 1eb764f22a49..140b76f588b6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.h @@ -9,18 +9,20 @@ struct nv50_bar { struct nvkm_gpuobj *mem; struct nvkm_gpuobj *pad; struct nvkm_gpuobj *pgd; - struct nvkm_vm *bar1_vm; + struct nvkm_vmm *bar1_vmm; struct nvkm_gpuobj *bar1; - struct nvkm_vm *bar3_vm; - struct nvkm_gpuobj *bar3; + struct nvkm_vmm *bar2_vmm; + struct nvkm_gpuobj *bar2; }; int nv50_bar_new_(const struct nvkm_bar_func *, struct nvkm_device *, int, u32 pgd_addr, struct nvkm_bar **); void *nv50_bar_dtor(struct nvkm_bar *); int nv50_bar_oneinit(struct nvkm_bar *); -int nv50_bar_init(struct nvkm_bar *); -struct nvkm_vm *nv50_bar_kmap(struct nvkm_bar *); -int nv50_bar_umap(struct nvkm_bar *, u64, int, struct nvkm_vma *); -void nv50_bar_unmap(struct nvkm_bar *, struct nvkm_vma *); +void nv50_bar_init(struct nvkm_bar *); +void nv50_bar_bar1_init(struct nvkm_bar *); +void nv50_bar_bar1_wait(struct nvkm_bar *); +struct nvkm_vmm *nv50_bar_bar1_vmm(struct nvkm_bar *); +void nv50_bar_bar2_init(struct nvkm_bar *); +struct nvkm_vmm *nv50_bar_bar2_vmm(struct nvkm_bar *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/priv.h index d834ef20db5b..14398e2dbdf9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/priv.h @@ -9,11 +9,25 @@ void nvkm_bar_ctor(const struct nvkm_bar_func *, struct nvkm_device *, struct nvkm_bar_func { void *(*dtor)(struct nvkm_bar *); int (*oneinit)(struct nvkm_bar *); - int (*init)(struct nvkm_bar *); - struct nvkm_vm *(*kmap)(struct nvkm_bar *); - int (*umap)(struct nvkm_bar *, u64 size, int type, struct nvkm_vma *); + void (*init)(struct nvkm_bar *); + + struct { + void (*init)(struct nvkm_bar *); + void (*fini)(struct nvkm_bar *); + void (*wait)(struct nvkm_bar *); + struct nvkm_vmm *(*vmm)(struct nvkm_bar *); + } bar1, bar2; + void (*flush)(struct nvkm_bar *); }; +void nv50_bar_bar1_fini(struct nvkm_bar *); +void nv50_bar_bar2_fini(struct nvkm_bar *); + void g84_bar_flush(struct nvkm_bar *); + +void gf100_bar_bar1_fini(struct nvkm_bar *); +void gf100_bar_bar2_fini(struct nvkm_bar *); + +void gm107_bar_bar1_wait(struct nvkm_bar *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/iccsense.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/iccsense.c index 23caef8df17f..73e463ed55c3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/iccsense.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/iccsense.c @@ -99,7 +99,7 @@ nvbios_iccsense_parse(struct nvkm_bios *bios, struct nvbios_iccsense *iccsense) rail->extdev_id = nvbios_rd08(bios, entry + 0x1); res_start = 0x5; break; - }; + } if (nvbios_extdev_parse(bios, rail->extdev_id, &extdev)) continue; @@ -115,7 +115,7 @@ nvbios_iccsense_parse(struct nvkm_bios *bios, struct nvbios_iccsense *iccsense) default: rail->resistor_count = 0; break; - }; + } for (r = 0; r < rail->resistor_count; ++r) { rail->resistors[r].mohm = nvbios_rd08(bios, entry + res_start + r * 2); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c index b58ee99f7bfc..9cc10e438b3d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c @@ -36,6 +36,8 @@ #include <subdev/i2c.h> #include <subdev/vga.h> +#include <linux/kernel.h> + #define bioslog(lvl, fmt, args...) do { \ nvkm_printk(init->subdev, lvl, info, "0x%08x[%c]: "fmt, \ init->offset, init_exec(init) ? \ @@ -2271,8 +2273,6 @@ static struct nvbios_init_opcode { [0xaa] = { init_reserved }, }; -#define init_opcode_nr (sizeof(init_opcode) / sizeof(init_opcode[0])) - int nvbios_exec(struct nvbios_init *init) { @@ -2281,7 +2281,8 @@ nvbios_exec(struct nvbios_init *init) init->nested++; while (init->offset) { u8 opcode = nvbios_rd08(bios, init->offset); - if (opcode >= init_opcode_nr || !init_opcode[opcode].exec) { + if (opcode >= ARRAY_SIZE(init_opcode) || + !init_opcode[opcode].exec) { error("unknown opcode 0x%02x\n", opcode); return -EINVAL; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/timing.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/timing.c index 7e83c3985020..20ff5173cf8f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/timing.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/timing.c @@ -115,16 +115,21 @@ nvbios_timingEp(struct nvkm_bios *bios, int idx, switch (min_t(u8, *hdr, 25)) { case 25: p->timing_10_24 = nvbios_rd08(bios, data + 0x18); + /* fall through */ case 24: case 23: case 22: p->timing_10_21 = nvbios_rd08(bios, data + 0x15); + /* fall through */ case 21: p->timing_10_20 = nvbios_rd08(bios, data + 0x14); + /* fall through */ case 20: p->timing_10_CWL = nvbios_rd08(bios, data + 0x13); + /* fall through */ case 19: p->timing_10_18 = nvbios_rd08(bios, data + 0x12); + /* fall through */ case 18: case 17: p->timing_10_16 = nvbios_rd08(bios, data + 0x10); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv04.c index 158977f8a6e6..c3dae05348eb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv04.c @@ -119,11 +119,11 @@ powerctrl_1_shift(int chip_version, int reg) switch (reg) { case 0x680520: - shift += 4; + shift += 4; /* fall through */ case 0x680508: - shift += 4; + shift += 4; /* fall through */ case 0x680504: - shift += 4; + shift += 4; /* fall through */ case 0x680500: shift += 4; } @@ -245,11 +245,11 @@ setPLL_double_highregs(struct nvkm_devinit *init, u32 reg1, switch (reg1) { case 0x680504: - shift_c040 += 2; + shift_c040 += 2; /* fall through */ case 0x680500: - shift_c040 += 2; + shift_c040 += 2; /* fall through */ case 0x680520: - shift_c040 += 2; + shift_c040 += 2; /* fall through */ case 0x680508: shift_c040 += 2; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c index a7049c041594..73b5d46104bd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c @@ -31,12 +31,6 @@ #include <engine/gr.h> #include <engine/mpeg.h> -bool -nvkm_fb_memtype_valid(struct nvkm_fb *fb, u32 memtype) -{ - return fb->func->memtype_valid(fb, memtype); -} - void nvkm_fb_tile_fini(struct nvkm_fb *fb, int region, struct nvkm_fb_tile *tile) { @@ -100,6 +94,7 @@ static int nvkm_fb_oneinit(struct nvkm_subdev *subdev) { struct nvkm_fb *fb = nvkm_fb(subdev); + u32 tags = 0; if (fb->func->ram_new) { int ret = fb->func->ram_new(fb, &fb->ram); @@ -115,7 +110,16 @@ nvkm_fb_oneinit(struct nvkm_subdev *subdev) return ret; } - return 0; + /* Initialise compression tag allocator. + * + * LTC oneinit() will override this on Fermi and newer. + */ + if (fb->func->tags) { + tags = fb->func->tags(fb); + nvkm_debug(subdev, "%d comptags\n", tags); + } + + return nvkm_mm_init(&fb->tags, 0, 0, tags, 1); } static int @@ -135,8 +139,13 @@ nvkm_fb_init(struct nvkm_subdev *subdev) if (fb->func->init) fb->func->init(fb); - if (fb->func->init_page) - fb->func->init_page(fb); + + if (fb->func->init_page) { + ret = fb->func->init_page(fb); + if (WARN_ON(ret)) + return ret; + } + if (fb->func->init_unkn) fb->func->init_unkn(fb); return 0; @@ -148,12 +157,13 @@ nvkm_fb_dtor(struct nvkm_subdev *subdev) struct nvkm_fb *fb = nvkm_fb(subdev); int i; - nvkm_memory_del(&fb->mmu_wr); - nvkm_memory_del(&fb->mmu_rd); + nvkm_memory_unref(&fb->mmu_wr); + nvkm_memory_unref(&fb->mmu_rd); for (i = 0; i < fb->tile.regions; i++) fb->func->tile.fini(fb, i, &fb->tile.region[i]); + nvkm_mm_fini(&fb->tags); nvkm_ram_del(&fb->ram); if (fb->func->dtor) @@ -176,7 +186,8 @@ nvkm_fb_ctor(const struct nvkm_fb_func *func, struct nvkm_device *device, nvkm_subdev_ctor(&nvkm_fb, device, index, &fb->subdev); fb->func = func; fb->tile.regions = fb->func->tile.regions; - fb->page = nvkm_longopt(device->cfgopt, "NvFbBigPage", 0); + fb->page = nvkm_longopt(device->cfgopt, "NvFbBigPage", + fb->func->default_bigpage); } int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/g84.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/g84.c index 9c28392d07e4..06bf95c0c549 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/g84.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/g84.c @@ -27,6 +27,7 @@ static const struct nv50_fb_func g84_fb = { .ram_new = nv50_ram_new, + .tags = nv20_fb_tags, .trap = 0x001d07ff, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c index a239e73562c8..47d28c279707 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c @@ -27,15 +27,6 @@ #include <core/memory.h> #include <core/option.h> -extern const u8 gf100_pte_storage_type_map[256]; - -bool -gf100_fb_memtype_valid(struct nvkm_fb *fb, u32 tile_flags) -{ - u8 memtype = (tile_flags & 0x0000ff00) >> 8; - return likely((gf100_pte_storage_type_map[memtype] != 0xff)); -} - void gf100_fb_intr(struct nvkm_fb *base) { @@ -80,20 +71,17 @@ gf100_fb_oneinit(struct nvkm_fb *base) return 0; } -void +int gf100_fb_init_page(struct nvkm_fb *fb) { struct nvkm_device *device = fb->subdev.device; switch (fb->page) { - case 16: - nvkm_mask(device, 0x100c80, 0x00000001, 0x00000001); - break; - case 17: + case 16: nvkm_mask(device, 0x100c80, 0x00000001, 0x00000001); break; + case 17: nvkm_mask(device, 0x100c80, 0x00000001, 0x00000000); break; default: - nvkm_mask(device, 0x100c80, 0x00000001, 0x00000000); - fb->page = 17; - break; + return -EINVAL; } + return 0; } void @@ -143,7 +131,7 @@ gf100_fb = { .init_page = gf100_fb_init_page, .intr = gf100_fb_intr, .ram_new = gf100_ram_new, - .memtype_valid = gf100_fb_memtype_valid, + .default_bigpage = 17, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h index 412eb89834e8..e3cf0515bb70 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h @@ -17,7 +17,5 @@ void gf100_fb_intr(struct nvkm_fb *); void gp100_fb_init(struct nvkm_fb *); -void gm200_fb_init_page(struct nvkm_fb *fb); void gm200_fb_init(struct nvkm_fb *base); - #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c index 56af84aa333b..4a9f463745b5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c @@ -32,7 +32,7 @@ gf108_fb = { .init_page = gf100_fb_init_page, .intr = gf100_fb_intr, .ram_new = gf108_ram_new, - .memtype_valid = gf100_fb_memtype_valid, + .default_bigpage = 17, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c index 4245e2e6e604..0a6e8eaad42c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c @@ -32,7 +32,7 @@ gk104_fb = { .init_page = gf100_fb_init_page, .intr = gf100_fb_intr, .ram_new = gk104_ram_new, - .memtype_valid = gf100_fb_memtype_valid, + .default_bigpage = 17, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c index 5d34d6136616..a7e29b125094 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c @@ -30,7 +30,7 @@ gk20a_fb = { .init = gf100_fb_init, .init_page = gf100_fb_init_page, .intr = gf100_fb_intr, - .memtype_valid = gf100_fb_memtype_valid, + .default_bigpage = 17, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c index db699025f546..69c876d5d1c1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c @@ -32,7 +32,7 @@ gm107_fb = { .init_page = gf100_fb_init_page, .intr = gf100_fb_intr, .ram_new = gm107_ram_new, - .memtype_valid = gf100_fb_memtype_valid, + .default_bigpage = 17, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c index d83da5ddbc1e..8137e19d3292 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c @@ -26,22 +26,18 @@ #include <core/memory.h> -void +int gm200_fb_init_page(struct nvkm_fb *fb) { struct nvkm_device *device = fb->subdev.device; switch (fb->page) { - case 16: - nvkm_mask(device, 0x100c80, 0x00000801, 0x00000001); - break; - case 17: - nvkm_mask(device, 0x100c80, 0x00000801, 0x00000000); - break; + case 16: nvkm_mask(device, 0x100c80, 0x00001801, 0x00001001); break; + case 17: nvkm_mask(device, 0x100c80, 0x00001801, 0x00000000); break; + case 0: nvkm_mask(device, 0x100c80, 0x00001800, 0x00001800); break; default: - nvkm_mask(device, 0x100c80, 0x00000800, 0x00000800); - fb->page = 0; - break; + return -EINVAL; } + return 0; } void @@ -69,7 +65,7 @@ gm200_fb = { .init_page = gm200_fb_init_page, .intr = gf100_fb_intr, .ram_new = gm200_ram_new, - .memtype_valid = gf100_fb_memtype_valid, + .default_bigpage = 0 /* per-instance. */, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c index b87c233bcd6d..12db61e31128 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c @@ -30,7 +30,7 @@ gm20b_fb = { .init = gm200_fb_init, .init_page = gm200_fb_init_page, .intr = gf100_fb_intr, - .memtype_valid = gf100_fb_memtype_valid, + .default_bigpage = 0 /* per-instance. */, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c index 98474aec1921..147f69b30cd8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c @@ -59,7 +59,6 @@ gp100_fb = { .init_page = gm200_fb_init_page, .init_unkn = gp100_fb_init_unkn, .ram_new = gp100_ram_new, - .memtype_valid = gf100_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c index 73b4ae1c73dc..b84b9861ef26 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c @@ -33,7 +33,6 @@ gp102_fb = { .init = gp100_fb_init, .init_page = gm200_fb_init_page, .ram_new = gp100_ram_new, - .memtype_valid = gf100_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp10b.c index f2b1fbf428d5..af8e43979dc1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp10b.c @@ -28,7 +28,6 @@ gp10b_fb = { .init = gm200_fb_init, .init_page = gm200_fb_init_page, .intr = gf100_fb_intr, - .memtype_valid = gf100_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gt215.c index ebb30608d5ef..9266559b45f9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gt215.c @@ -27,6 +27,7 @@ static const struct nv50_fb_func gt215_fb = { .ram_new = gt215_ram_new, + .tags = nv20_fb_tags, .trap = 0x000d0fff, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv04.c index 8ff2e5db4571..c886664533c8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv04.c @@ -25,14 +25,6 @@ #include "ram.h" #include "regsnv04.h" -bool -nv04_fb_memtype_valid(struct nvkm_fb *fb, u32 tile_flags) -{ - if (!(tile_flags & 0xff00)) - return true; - return false; -} - static void nv04_fb_init(struct nvkm_fb *fb) { @@ -49,7 +41,6 @@ static const struct nvkm_fb_func nv04_fb = { .init = nv04_fb_init, .ram_new = nv04_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv10.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv10.c index e8c44f5a3d84..c998b7e96aa3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv10.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv10.c @@ -61,7 +61,6 @@ nv10_fb = { .tile.fini = nv10_fb_tile_fini, .tile.prog = nv10_fb_tile_prog, .ram_new = nv10_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv1a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv1a.c index 2ae0beb87567..7b9f04f44af8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv1a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv1a.c @@ -33,7 +33,6 @@ nv1a_fb = { .tile.fini = nv10_fb_tile_fini, .tile.prog = nv10_fb_tile_prog, .ram_new = nv1a_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv20.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv20.c index 126865dfe777..a021d21ff153 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv20.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv20.c @@ -45,7 +45,7 @@ nv20_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, { u32 tiles = DIV_ROUND_UP(size, 0x40); u32 tags = round_up(tiles / fb->ram->parts, 0x40); - if (!nvkm_mm_head(&fb->ram->tags, 0, 1, tags, tags, 1, &tile->tag)) { + if (!nvkm_mm_head(&fb->tags, 0, 1, tags, tags, 1, &tile->tag)) { if (!(flags & 2)) tile->zcomp = 0x00000000; /* Z16 */ else tile->zcomp = 0x04000000; /* Z24S8 */ tile->zcomp |= tile->tag->offset; @@ -63,7 +63,7 @@ nv20_fb_tile_fini(struct nvkm_fb *fb, int i, struct nvkm_fb_tile *tile) tile->limit = 0; tile->pitch = 0; tile->zcomp = 0; - nvkm_mm_free(&fb->ram->tags, &tile->tag); + nvkm_mm_free(&fb->tags, &tile->tag); } void @@ -77,15 +77,22 @@ nv20_fb_tile_prog(struct nvkm_fb *fb, int i, struct nvkm_fb_tile *tile) nvkm_wr32(device, 0x100300 + (i * 0x04), tile->zcomp); } +u32 +nv20_fb_tags(struct nvkm_fb *fb) +{ + const u32 tags = nvkm_rd32(fb->subdev.device, 0x100320); + return tags ? tags + 1 : 0; +} + static const struct nvkm_fb_func nv20_fb = { + .tags = nv20_fb_tags, .tile.regions = 8, .tile.init = nv20_fb_tile_init, .tile.comp = nv20_fb_tile_comp, .tile.fini = nv20_fb_tile_fini, .tile.prog = nv20_fb_tile_prog, .ram_new = nv20_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv25.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv25.c index c56746d2a502..7709f5fe9a45 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv25.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv25.c @@ -32,7 +32,7 @@ nv25_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, { u32 tiles = DIV_ROUND_UP(size, 0x40); u32 tags = round_up(tiles / fb->ram->parts, 0x40); - if (!nvkm_mm_head(&fb->ram->tags, 0, 1, tags, tags, 1, &tile->tag)) { + if (!nvkm_mm_head(&fb->tags, 0, 1, tags, tags, 1, &tile->tag)) { if (!(flags & 2)) tile->zcomp = 0x00100000; /* Z16 */ else tile->zcomp = 0x00200000; /* Z24S8 */ tile->zcomp |= tile->tag->offset; @@ -44,13 +44,13 @@ nv25_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, static const struct nvkm_fb_func nv25_fb = { + .tags = nv20_fb_tags, .tile.regions = 8, .tile.init = nv20_fb_tile_init, .tile.comp = nv25_fb_tile_comp, .tile.fini = nv20_fb_tile_fini, .tile.prog = nv20_fb_tile_prog, .ram_new = nv20_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv30.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv30.c index 2a7c4831b821..8aa782666507 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv30.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv30.c @@ -51,7 +51,7 @@ nv30_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, { u32 tiles = DIV_ROUND_UP(size, 0x40); u32 tags = round_up(tiles / fb->ram->parts, 0x40); - if (!nvkm_mm_head(&fb->ram->tags, 0, 1, tags, tags, 1, &tile->tag)) { + if (!nvkm_mm_head(&fb->tags, 0, 1, tags, tags, 1, &tile->tag)) { if (flags & 2) tile->zcomp |= 0x01000000; /* Z16 */ else tile->zcomp |= 0x02000000; /* Z24S8 */ tile->zcomp |= ((tile->tag->offset ) >> 6); @@ -116,6 +116,7 @@ nv30_fb_init(struct nvkm_fb *fb) static const struct nvkm_fb_func nv30_fb = { + .tags = nv20_fb_tags, .init = nv30_fb_init, .tile.regions = 8, .tile.init = nv30_fb_tile_init, @@ -123,7 +124,6 @@ nv30_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv20_fb_tile_prog, .ram_new = nv20_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv35.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv35.c index 1604b3789ad1..6e83dcff72e0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv35.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv35.c @@ -32,7 +32,7 @@ nv35_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, { u32 tiles = DIV_ROUND_UP(size, 0x40); u32 tags = round_up(tiles / fb->ram->parts, 0x40); - if (!nvkm_mm_head(&fb->ram->tags, 0, 1, tags, tags, 1, &tile->tag)) { + if (!nvkm_mm_head(&fb->tags, 0, 1, tags, tags, 1, &tile->tag)) { if (flags & 2) tile->zcomp |= 0x04000000; /* Z16 */ else tile->zcomp |= 0x08000000; /* Z24S8 */ tile->zcomp |= ((tile->tag->offset ) >> 6); @@ -45,6 +45,7 @@ nv35_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, static const struct nvkm_fb_func nv35_fb = { + .tags = nv20_fb_tags, .init = nv30_fb_init, .tile.regions = 8, .tile.init = nv30_fb_tile_init, @@ -52,7 +53,6 @@ nv35_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv20_fb_tile_prog, .ram_new = nv20_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv36.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv36.c index 80cc0a6e3416..2a07617bb44c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv36.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv36.c @@ -32,7 +32,7 @@ nv36_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, { u32 tiles = DIV_ROUND_UP(size, 0x40); u32 tags = round_up(tiles / fb->ram->parts, 0x40); - if (!nvkm_mm_head(&fb->ram->tags, 0, 1, tags, tags, 1, &tile->tag)) { + if (!nvkm_mm_head(&fb->tags, 0, 1, tags, tags, 1, &tile->tag)) { if (flags & 2) tile->zcomp |= 0x10000000; /* Z16 */ else tile->zcomp |= 0x20000000; /* Z24S8 */ tile->zcomp |= ((tile->tag->offset ) >> 6); @@ -45,6 +45,7 @@ nv36_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, static const struct nvkm_fb_func nv36_fb = { + .tags = nv20_fb_tags, .init = nv30_fb_init, .tile.regions = 8, .tile.init = nv30_fb_tile_init, @@ -52,7 +53,6 @@ nv36_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv20_fb_tile_prog, .ram_new = nv20_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv40.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv40.c index deec46a310f8..955160778b5b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv40.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv40.c @@ -33,7 +33,7 @@ nv40_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, u32 tiles = DIV_ROUND_UP(size, 0x80); u32 tags = round_up(tiles / fb->ram->parts, 0x100); if ( (flags & 2) && - !nvkm_mm_head(&fb->ram->tags, 0, 1, tags, tags, 1, &tile->tag)) { + !nvkm_mm_head(&fb->tags, 0, 1, tags, tags, 1, &tile->tag)) { tile->zcomp = 0x28000000; /* Z24S8_SPLIT_GRAD */ tile->zcomp |= ((tile->tag->offset ) >> 8); tile->zcomp |= ((tile->tag->offset + tags - 1) >> 8) << 13; @@ -51,6 +51,7 @@ nv40_fb_init(struct nvkm_fb *fb) static const struct nvkm_fb_func nv40_fb = { + .tags = nv20_fb_tags, .init = nv40_fb_init, .tile.regions = 8, .tile.init = nv30_fb_tile_init, @@ -58,7 +59,6 @@ nv40_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv20_fb_tile_prog, .ram_new = nv40_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv41.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv41.c index 79e57dd5a00f..b77f08d34cc3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv41.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv41.c @@ -45,6 +45,7 @@ nv41_fb_init(struct nvkm_fb *fb) static const struct nvkm_fb_func nv41_fb = { + .tags = nv20_fb_tags, .init = nv41_fb_init, .tile.regions = 12, .tile.init = nv30_fb_tile_init, @@ -52,7 +53,6 @@ nv41_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv41_fb_tile_prog, .ram_new = nv41_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv44.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv44.c index 06246cce5ec4..b59dc486083d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv44.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv44.c @@ -62,7 +62,6 @@ nv44_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv44_fb_tile_prog, .ram_new = nv44_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv46.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv46.c index 3598a1aa65be..cab7d20fa039 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv46.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv46.c @@ -48,7 +48,6 @@ nv46_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv44_fb_tile_prog, .ram_new = nv44_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv47.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv47.c index c505e4429314..a8b0ad4c871d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv47.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv47.c @@ -28,6 +28,7 @@ static const struct nvkm_fb_func nv47_fb = { + .tags = nv20_fb_tags, .init = nv41_fb_init, .tile.regions = 15, .tile.init = nv30_fb_tile_init, @@ -35,7 +36,6 @@ nv47_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv41_fb_tile_prog, .ram_new = nv41_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv49.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv49.c index 7b91b9f170e5..d0b317bb0252 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv49.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv49.c @@ -28,6 +28,7 @@ static const struct nvkm_fb_func nv49_fb = { + .tags = nv20_fb_tags, .init = nv41_fb_init, .tile.regions = 15, .tile.init = nv30_fb_tile_init, @@ -35,7 +36,6 @@ nv49_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv41_fb_tile_prog, .ram_new = nv49_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv4e.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv4e.c index 4e98210c1b1c..6a6f0c086071 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv4e.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv4e.c @@ -34,7 +34,6 @@ nv4e_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv44_fb_tile_prog, .ram_new = nv44_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c index 0595e0722bfc..b2f5bf8144ea 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c @@ -28,18 +28,6 @@ #include <core/enum.h> #include <engine/fifo.h> -int -nv50_fb_memtype[0x80] = { - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 0, - 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 2, 2, 2, 2, - 1, 0, 2, 0, 1, 0, 2, 0, 1, 1, 2, 2, 1, 1, 0, 0 -}; - static int nv50_fb_ram_new(struct nvkm_fb *base, struct nvkm_ram **pram) { @@ -47,12 +35,6 @@ nv50_fb_ram_new(struct nvkm_fb *base, struct nvkm_ram **pram) return fb->func->ram_new(&fb->base, pram); } -static bool -nv50_fb_memtype_valid(struct nvkm_fb *fb, u32 memtype) -{ - return nv50_fb_memtype[(memtype & 0xff00) >> 8] != 0; -} - static const struct nvkm_enum vm_dispatch_subclients[] = { { 0x00000000, "GRCTX" }, { 0x00000001, "NOTIFY" }, @@ -244,6 +226,15 @@ nv50_fb_init(struct nvkm_fb *base) nvkm_wr32(device, 0x100c90, fb->func->trap); } +static u32 +nv50_fb_tags(struct nvkm_fb *base) +{ + struct nv50_fb *fb = nv50_fb(base); + if (fb->func->tags) + return fb->func->tags(&fb->base); + return 0; +} + static void * nv50_fb_dtor(struct nvkm_fb *base) { @@ -262,11 +253,11 @@ nv50_fb_dtor(struct nvkm_fb *base) static const struct nvkm_fb_func nv50_fb_ = { .dtor = nv50_fb_dtor, + .tags = nv50_fb_tags, .oneinit = nv50_fb_oneinit, .init = nv50_fb_init, .intr = nv50_fb_intr, .ram_new = nv50_fb_ram_new, - .memtype_valid = nv50_fb_memtype_valid, }; int @@ -287,6 +278,7 @@ nv50_fb_new_(const struct nv50_fb_func *func, struct nvkm_device *device, static const struct nv50_fb_func nv50_fb = { .ram_new = nv50_ram_new, + .tags = nv20_fb_tags, .trap = 0x000707ff, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.h index faa88c8c66fe..13231d4b00d9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.h @@ -12,10 +12,10 @@ struct nv50_fb { struct nv50_fb_func { int (*ram_new)(struct nvkm_fb *, struct nvkm_ram **); + u32 (*tags)(struct nvkm_fb *); u32 trap; }; int nv50_fb_new_(const struct nv50_fb_func *, struct nvkm_device *, int index, struct nvkm_fb **pfb); -extern int nv50_fb_memtype[0x80]; #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h index e905d44fa1d5..e05d95240e85 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h @@ -6,9 +6,10 @@ struct nvkm_bios; struct nvkm_fb_func { void *(*dtor)(struct nvkm_fb *); + u32 (*tags)(struct nvkm_fb *); int (*oneinit)(struct nvkm_fb *); void (*init)(struct nvkm_fb *); - void (*init_page)(struct nvkm_fb *); + int (*init_page)(struct nvkm_fb *); void (*init_unkn)(struct nvkm_fb *); void (*intr)(struct nvkm_fb *); @@ -24,7 +25,7 @@ struct nvkm_fb_func { int (*ram_new)(struct nvkm_fb *, struct nvkm_ram **); - bool (*memtype_valid)(struct nvkm_fb *, u32 memtype); + u8 default_bigpage; }; void nvkm_fb_ctor(const struct nvkm_fb_func *, struct nvkm_device *device, @@ -33,13 +34,12 @@ int nvkm_fb_new_(const struct nvkm_fb_func *, struct nvkm_device *device, int index, struct nvkm_fb **); int nvkm_fb_bios_memtype(struct nvkm_bios *); -bool nv04_fb_memtype_valid(struct nvkm_fb *, u32 memtype); - void nv10_fb_tile_init(struct nvkm_fb *, int i, u32 addr, u32 size, u32 pitch, u32 flags, struct nvkm_fb_tile *); void nv10_fb_tile_fini(struct nvkm_fb *, int i, struct nvkm_fb_tile *); void nv10_fb_tile_prog(struct nvkm_fb *, int, struct nvkm_fb_tile *); +u32 nv20_fb_tags(struct nvkm_fb *); void nv20_fb_tile_init(struct nvkm_fb *, int i, u32 addr, u32 size, u32 pitch, u32 flags, struct nvkm_fb_tile *); void nv20_fb_tile_fini(struct nvkm_fb *, int i, struct nvkm_fb_tile *); @@ -62,8 +62,7 @@ void nv46_fb_tile_init(struct nvkm_fb *, int i, u32 addr, u32 size, u32 pitch, u32 flags, struct nvkm_fb_tile *); int gf100_fb_oneinit(struct nvkm_fb *); -void gf100_fb_init_page(struct nvkm_fb *); -bool gf100_fb_memtype_valid(struct nvkm_fb *, u32); +int gf100_fb_init_page(struct nvkm_fb *); -void gm200_fb_init_page(struct nvkm_fb *); +int gm200_fb_init_page(struct nvkm_fb *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c index c17d559dbfbe..24c7bd505731 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c @@ -21,8 +21,132 @@ * * Authors: Ben Skeggs <bskeggs@redhat.com> */ +#define nvkm_vram(p) container_of((p), struct nvkm_vram, memory) #include "ram.h" +#include <core/memory.h> +#include <subdev/mmu.h> + +struct nvkm_vram { + struct nvkm_memory memory; + struct nvkm_ram *ram; + u8 page; + struct nvkm_mm_node *mn; +}; + +static int +nvkm_vram_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm, + struct nvkm_vma *vma, void *argv, u32 argc) +{ + struct nvkm_vram *vram = nvkm_vram(memory); + struct nvkm_vmm_map map = { + .memory = &vram->memory, + .offset = offset, + .mem = vram->mn, + }; + + return nvkm_vmm_map(vmm, vma, argv, argc, &map); +} + +static u64 +nvkm_vram_size(struct nvkm_memory *memory) +{ + return (u64)nvkm_mm_size(nvkm_vram(memory)->mn) << NVKM_RAM_MM_SHIFT; +} + +static u64 +nvkm_vram_addr(struct nvkm_memory *memory) +{ + struct nvkm_vram *vram = nvkm_vram(memory); + if (!nvkm_mm_contiguous(vram->mn)) + return ~0ULL; + return (u64)nvkm_mm_addr(vram->mn) << NVKM_RAM_MM_SHIFT; +} + +static u8 +nvkm_vram_page(struct nvkm_memory *memory) +{ + return nvkm_vram(memory)->page; +} + +static enum nvkm_memory_target +nvkm_vram_target(struct nvkm_memory *memory) +{ + return NVKM_MEM_TARGET_VRAM; +} + +static void * +nvkm_vram_dtor(struct nvkm_memory *memory) +{ + struct nvkm_vram *vram = nvkm_vram(memory); + struct nvkm_mm_node *next = vram->mn; + struct nvkm_mm_node *node; + mutex_lock(&vram->ram->fb->subdev.mutex); + while ((node = next)) { + next = node->next; + nvkm_mm_free(&vram->ram->vram, &node); + } + mutex_unlock(&vram->ram->fb->subdev.mutex); + return vram; +} + +static const struct nvkm_memory_func +nvkm_vram = { + .dtor = nvkm_vram_dtor, + .target = nvkm_vram_target, + .page = nvkm_vram_page, + .addr = nvkm_vram_addr, + .size = nvkm_vram_size, + .map = nvkm_vram_map, +}; + +int +nvkm_ram_get(struct nvkm_device *device, u8 heap, u8 type, u8 rpage, u64 size, + bool contig, bool back, struct nvkm_memory **pmemory) +{ + struct nvkm_ram *ram; + struct nvkm_mm *mm; + struct nvkm_mm_node **node, *r; + struct nvkm_vram *vram; + u8 page = max(rpage, (u8)NVKM_RAM_MM_SHIFT); + u32 align = (1 << page) >> NVKM_RAM_MM_SHIFT; + u32 max = ALIGN(size, 1 << page) >> NVKM_RAM_MM_SHIFT; + u32 min = contig ? max : align; + int ret; + + if (!device->fb || !(ram = device->fb->ram)) + return -ENODEV; + ram = device->fb->ram; + mm = &ram->vram; + + if (!(vram = kzalloc(sizeof(*vram), GFP_KERNEL))) + return -ENOMEM; + nvkm_memory_ctor(&nvkm_vram, &vram->memory); + vram->ram = ram; + vram->page = page; + *pmemory = &vram->memory; + + mutex_lock(&ram->fb->subdev.mutex); + node = &vram->mn; + do { + if (back) + ret = nvkm_mm_tail(mm, heap, type, max, min, align, &r); + else + ret = nvkm_mm_head(mm, heap, type, max, min, align, &r); + if (ret) { + mutex_unlock(&ram->fb->subdev.mutex); + nvkm_memory_unref(pmemory); + return ret; + } + + *node = r; + node = &r->next; + max -= r->length; + } while (max); + mutex_unlock(&ram->fb->subdev.mutex); + return 0; +} + int nvkm_ram_init(struct nvkm_ram *ram) { @@ -38,7 +162,6 @@ nvkm_ram_del(struct nvkm_ram **pram) if (ram && !WARN_ON(!ram->func)) { if (ram->func->dtor) *pram = ram->func->dtor(ram); - nvkm_mm_fini(&ram->tags); nvkm_mm_fini(&ram->vram); kfree(*pram); *pram = NULL; @@ -47,8 +170,7 @@ nvkm_ram_del(struct nvkm_ram **pram) int nvkm_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb, - enum nvkm_ram_type type, u64 size, u32 tags, - struct nvkm_ram *ram) + enum nvkm_ram_type type, u64 size, struct nvkm_ram *ram) { static const char *name[] = { [NVKM_RAM_TYPE_UNKNOWN] = "of unknown memory type", @@ -73,28 +195,20 @@ nvkm_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb, ram->size = size; if (!nvkm_mm_initialised(&ram->vram)) { - ret = nvkm_mm_init(&ram->vram, 0, size >> NVKM_RAM_MM_SHIFT, 1); + ret = nvkm_mm_init(&ram->vram, NVKM_RAM_MM_NORMAL, 0, + size >> NVKM_RAM_MM_SHIFT, 1); if (ret) return ret; } - if (!nvkm_mm_initialised(&ram->tags)) { - ret = nvkm_mm_init(&ram->tags, 0, tags ? ++tags : 0, 1); - if (ret) - return ret; - - nvkm_debug(subdev, "%d compression tags\n", tags); - } - return 0; } int nvkm_ram_new_(const struct nvkm_ram_func *func, struct nvkm_fb *fb, - enum nvkm_ram_type type, u64 size, u32 tags, - struct nvkm_ram **pram) + enum nvkm_ram_type type, u64 size, struct nvkm_ram **pram) { if (!(*pram = kzalloc(sizeof(**pram), GFP_KERNEL))) return -ENOMEM; - return nvkm_ram_ctor(func, fb, type, size, tags, *pram); + return nvkm_ram_ctor(func, fb, type, size, *pram); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h index fac7e73c3ddf..70fd59dcd06d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h @@ -3,11 +3,9 @@ #include "priv.h" int nvkm_ram_ctor(const struct nvkm_ram_func *, struct nvkm_fb *, - enum nvkm_ram_type, u64 size, u32 tags, - struct nvkm_ram *); + enum nvkm_ram_type, u64 size, struct nvkm_ram *); int nvkm_ram_new_(const struct nvkm_ram_func *, struct nvkm_fb *, - enum nvkm_ram_type, u64 size, u32 tags, - struct nvkm_ram **); + enum nvkm_ram_type, u64 size, struct nvkm_ram **); void nvkm_ram_del(struct nvkm_ram **); int nvkm_ram_init(struct nvkm_ram *); @@ -15,9 +13,6 @@ extern const struct nvkm_ram_func nv04_ram_func; int nv50_ram_ctor(const struct nvkm_ram_func *, struct nvkm_fb *, struct nvkm_ram *); -int nv50_ram_get(struct nvkm_ram *, u64, u32, u32, u32, struct nvkm_mem **); -void nv50_ram_put(struct nvkm_ram *, struct nvkm_mem **); -void __nv50_ram_put(struct nvkm_ram *, struct nvkm_mem *); int gf100_ram_new_(const struct nvkm_ram_func *, struct nvkm_fb *, struct nvkm_ram **); @@ -28,8 +23,6 @@ u32 gf100_ram_probe_fbp(const struct nvkm_ram_func *, u32 gf100_ram_probe_fbp_amount(const struct nvkm_ram_func *, u32, struct nvkm_device *, int, int *); u32 gf100_ram_probe_fbpa_amount(struct nvkm_device *, int); -int gf100_ram_get(struct nvkm_ram *, u64, u32, u32, u32, struct nvkm_mem **); -void gf100_ram_put(struct nvkm_ram *, struct nvkm_mem **); int gf100_ram_init(struct nvkm_ram *); int gf100_ram_calc(struct nvkm_ram *, u32); int gf100_ram_prog(struct nvkm_ram *); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c index 4a9bd4f1cb93..ac87a3b6b7c9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c @@ -32,7 +32,6 @@ #include <subdev/bios/timing.h> #include <subdev/clk.h> #include <subdev/clk/pll.h> -#include <subdev/ltc.h> struct gf100_ramfuc { struct ramfuc base; @@ -420,86 +419,6 @@ gf100_ram_tidy(struct nvkm_ram *base) ram_exec(&ram->fuc, false); } -void -gf100_ram_put(struct nvkm_ram *ram, struct nvkm_mem **pmem) -{ - struct nvkm_ltc *ltc = ram->fb->subdev.device->ltc; - struct nvkm_mem *mem = *pmem; - - *pmem = NULL; - if (unlikely(mem == NULL)) - return; - - mutex_lock(&ram->fb->subdev.mutex); - if (mem->tag) - nvkm_ltc_tags_free(ltc, &mem->tag); - __nv50_ram_put(ram, mem); - mutex_unlock(&ram->fb->subdev.mutex); - - kfree(mem); -} - -int -gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin, - u32 memtype, struct nvkm_mem **pmem) -{ - struct nvkm_ltc *ltc = ram->fb->subdev.device->ltc; - struct nvkm_mm *mm = &ram->vram; - struct nvkm_mm_node **node, *r; - struct nvkm_mem *mem; - int type = (memtype & 0x0ff); - int back = (memtype & 0x800); - const bool comp = gf100_pte_storage_type_map[type] != type; - int ret; - - size >>= NVKM_RAM_MM_SHIFT; - align >>= NVKM_RAM_MM_SHIFT; - ncmin >>= NVKM_RAM_MM_SHIFT; - if (!ncmin) - ncmin = size; - - mem = kzalloc(sizeof(*mem), GFP_KERNEL); - if (!mem) - return -ENOMEM; - - mem->size = size; - - mutex_lock(&ram->fb->subdev.mutex); - if (comp) { - /* compression only works with lpages */ - if (align == (1 << (17 - NVKM_RAM_MM_SHIFT))) { - int n = size >> 5; - nvkm_ltc_tags_alloc(ltc, n, &mem->tag); - } - - if (unlikely(!mem->tag)) - type = gf100_pte_storage_type_map[type]; - } - mem->memtype = type; - - node = &mem->mem; - do { - if (back) - ret = nvkm_mm_tail(mm, 0, 1, size, ncmin, align, &r); - else - ret = nvkm_mm_head(mm, 0, 1, size, ncmin, align, &r); - if (ret) { - mutex_unlock(&ram->fb->subdev.mutex); - ram->func->put(ram, &mem); - return ret; - } - - *node = r; - node = &r->next; - size -= r->length; - } while (size); - mutex_unlock(&ram->fb->subdev.mutex); - - mem->offset = (u64)mem->mem->offset << NVKM_RAM_MM_SHIFT; - *pmem = mem; - return 0; -} - int gf100_ram_init(struct nvkm_ram *base) { @@ -604,7 +523,7 @@ gf100_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb, nvkm_debug(subdev, "Upper: %4lld MiB @ %010llx\n", usize >> 20, ubase); nvkm_debug(subdev, "Total: %4lld MiB\n", total >> 20); - ret = nvkm_ram_ctor(func, fb, type, total, 0, ram); + ret = nvkm_ram_ctor(func, fb, type, total, ram); if (ret) return ret; @@ -617,7 +536,8 @@ gf100_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb, */ if (lower != total) { /* The common memory amount is addressed normally. */ - ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT, + ret = nvkm_mm_init(&ram->vram, NVKM_RAM_MM_NORMAL, + rsvd_head >> NVKM_RAM_MM_SHIFT, (lower - rsvd_head) >> NVKM_RAM_MM_SHIFT, 1); if (ret) return ret; @@ -625,13 +545,15 @@ gf100_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb, /* And the rest is much higher in the physical address * space, and may not be usable for certain operations. */ - ret = nvkm_mm_init(&ram->vram, ubase >> NVKM_RAM_MM_SHIFT, + ret = nvkm_mm_init(&ram->vram, NVKM_RAM_MM_MIXED, + ubase >> NVKM_RAM_MM_SHIFT, (usize - rsvd_tail) >> NVKM_RAM_MM_SHIFT, 1); if (ret) return ret; } else { /* GPUs without mixed-memory are a lot nicer... */ - ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT, + ret = nvkm_mm_init(&ram->vram, NVKM_RAM_MM_NORMAL, + rsvd_head >> NVKM_RAM_MM_SHIFT, (total - rsvd_head - rsvd_tail) >> NVKM_RAM_MM_SHIFT, 1); if (ret) @@ -738,8 +660,6 @@ gf100_ram = { .probe_fbp_amount = gf100_ram_probe_fbp_amount, .probe_fbpa_amount = gf100_ram_probe_fbpa_amount, .init = gf100_ram_init, - .get = gf100_ram_get, - .put = gf100_ram_put, .calc = gf100_ram_calc, .prog = gf100_ram_prog, .tidy = gf100_ram_tidy, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c index 985ec64cf369..70a06e3cd55a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c @@ -48,8 +48,6 @@ gf108_ram = { .probe_fbp_amount = gf108_ram_probe_fbp_amount, .probe_fbpa_amount = gf100_ram_probe_fbpa_amount, .init = gf100_ram_init, - .get = gf100_ram_get, - .put = gf100_ram_put, .calc = gf100_ram_calc, .prog = gf100_ram_prog, .tidy = gf100_ram_tidy, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c index 75814f15eb53..8bcb7e79a0cb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c @@ -1704,8 +1704,6 @@ gk104_ram = { .probe_fbpa_amount = gf100_ram_probe_fbpa_amount, .dtor = gk104_ram_dtor, .init = gk104_ram_init, - .get = gf100_ram_get, - .put = gf100_ram_put, .calc = gk104_ram_calc, .prog = gk104_ram_prog, .tidy = gk104_ram_tidy, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c index 3f0b56347291..27c68e3f9772 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c @@ -39,8 +39,6 @@ gm107_ram = { .probe_fbpa_amount = gf100_ram_probe_fbpa_amount, .dtor = gk104_ram_dtor, .init = gk104_ram_init, - .get = gf100_ram_get, - .put = gf100_ram_put, .calc = gk104_ram_calc, .prog = gk104_ram_prog, .tidy = gk104_ram_tidy, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c index fd8facf90476..6b0cac1fe7b4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c @@ -54,8 +54,6 @@ gm200_ram = { .probe_fbpa_amount = gf100_ram_probe_fbpa_amount, .dtor = gk104_ram_dtor, .init = gk104_ram_init, - .get = gf100_ram_get, - .put = gf100_ram_put, .calc = gk104_ram_calc, .prog = gk104_ram_prog, .tidy = gk104_ram_tidy, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c index df8a87333b67..adb62a6beb63 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c @@ -84,8 +84,6 @@ gp100_ram = { .probe_fbp_amount = gm200_ram_probe_fbp_amount, .probe_fbpa_amount = gp100_ram_probe_fbpa, .init = gp100_ram_init, - .get = gf100_ram_get, - .put = gf100_ram_put, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c index f10664372161..920b3d347803 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c @@ -26,6 +26,7 @@ #include "ram.h" #include "ramfuc.h" +#include <core/memory.h> #include <core/option.h> #include <subdev/bios.h> #include <subdev/bios/M0205.h> @@ -86,7 +87,7 @@ struct gt215_ltrain { u32 r_100720; u32 r_1111e0; u32 r_111400; - struct nvkm_mem *mem; + struct nvkm_memory *memory; }; struct gt215_ram { @@ -279,10 +280,10 @@ gt215_link_train_init(struct gt215_ram *ram) struct gt215_ltrain *train = &ram->ltrain; struct nvkm_device *device = ram->base.fb->subdev.device; struct nvkm_bios *bios = device->bios; - struct nvkm_mem *mem; struct nvbios_M0205E M0205E; u8 ver, hdr, cnt, len; u32 r001700; + u64 addr; int ret, i = 0; train->state = NVA3_TRAIN_UNSUPPORTED; @@ -297,14 +298,14 @@ gt215_link_train_init(struct gt215_ram *ram) train->state = NVA3_TRAIN_ONCE; - ret = ram->base.func->get(&ram->base, 0x8000, 0x10000, 0, 0x800, - &ram->ltrain.mem); + ret = nvkm_ram_get(device, NVKM_RAM_MM_NORMAL, 0x01, 16, 0x8000, + true, true, &ram->ltrain.memory); if (ret) return ret; - mem = ram->ltrain.mem; + addr = nvkm_memory_addr(ram->ltrain.memory); - nvkm_wr32(device, 0x100538, 0x10000000 | (mem->offset >> 16)); + nvkm_wr32(device, 0x100538, 0x10000000 | (addr >> 16)); nvkm_wr32(device, 0x1005a8, 0x0000ffff); nvkm_mask(device, 0x10f800, 0x00000001, 0x00000001); @@ -320,7 +321,7 @@ gt215_link_train_init(struct gt215_ram *ram) /* And upload the pattern */ r001700 = nvkm_rd32(device, 0x1700); - nvkm_wr32(device, 0x1700, mem->offset >> 16); + nvkm_wr32(device, 0x1700, addr >> 16); for (i = 0; i < 16; i++) nvkm_wr32(device, 0x700000 + (i << 2), pattern[i]); for (i = 0; i < 16; i++) @@ -336,8 +337,7 @@ gt215_link_train_init(struct gt215_ram *ram) static void gt215_link_train_fini(struct gt215_ram *ram) { - if (ram->ltrain.mem) - ram->base.func->put(&ram->base, &ram->ltrain.mem); + nvkm_memory_unref(&ram->ltrain.memory); } /* @@ -931,8 +931,6 @@ static const struct nvkm_ram_func gt215_ram_func = { .dtor = gt215_ram_dtor, .init = gt215_ram_init, - .get = nv50_ram_get, - .put = nv50_ram_put, .calc = gt215_ram_calc, .prog = gt215_ram_prog, .tidy = gt215_ram_tidy, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c index 017a91de74a0..7de18e53ef45 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c @@ -53,8 +53,6 @@ mcp77_ram_init(struct nvkm_ram *base) static const struct nvkm_ram_func mcp77_ram_func = { .init = mcp77_ram_init, - .get = nv50_ram_get, - .put = nv50_ram_put, }; int @@ -73,7 +71,7 @@ mcp77_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) *pram = &ram->base; ret = nvkm_ram_ctor(&mcp77_ram_func, fb, NVKM_RAM_TYPE_STOLEN, - size, 0, &ram->base); + size, &ram->base); if (ret) return ret; @@ -81,7 +79,8 @@ mcp77_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) ram->base.stolen = base; nvkm_mm_fini(&ram->base.vram); - return nvkm_mm_init(&ram->base.vram, rsvd_head >> NVKM_RAM_MM_SHIFT, + return nvkm_mm_init(&ram->base.vram, NVKM_RAM_MM_NORMAL, + rsvd_head >> NVKM_RAM_MM_SHIFT, (size - rsvd_head - rsvd_tail) >> NVKM_RAM_MM_SHIFT, 1); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv04.c index 6f053a03d61c..cc764a93f1a3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv04.c @@ -61,5 +61,5 @@ nv04_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) else type = NVKM_RAM_TYPE_SDRAM; - return nvkm_ram_new_(&nv04_ram_func, fb, type, size, 0, pram); + return nvkm_ram_new_(&nv04_ram_func, fb, type, size, pram); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv10.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv10.c index dfd155c98dbb..afe54e323b18 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv10.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv10.c @@ -36,5 +36,5 @@ nv10_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) else type = NVKM_RAM_TYPE_SDRAM; - return nvkm_ram_new_(&nv04_ram_func, fb, type, size, 0, pram); + return nvkm_ram_new_(&nv04_ram_func, fb, type, size, pram); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv1a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv1a.c index 3c6a8710e812..4c07d10bb976 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv1a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv1a.c @@ -44,5 +44,5 @@ nv1a_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) } return nvkm_ram_new_(&nv04_ram_func, fb, NVKM_RAM_TYPE_STOLEN, - mib * 1024 * 1024, 0, pram); + mib * 1024 * 1024, pram); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv20.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv20.c index 747e47c10cc7..71d63d7daa75 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv20.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv20.c @@ -29,7 +29,6 @@ nv20_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) struct nvkm_device *device = fb->subdev.device; u32 pbus1218 = nvkm_rd32(device, 0x001218); u32 size = (nvkm_rd32(device, 0x10020c) & 0xff000000); - u32 tags = nvkm_rd32(device, 0x100320); enum nvkm_ram_type type = NVKM_RAM_TYPE_UNKNOWN; int ret; @@ -40,7 +39,7 @@ nv20_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) case 0x00000300: type = NVKM_RAM_TYPE_GDDR2; break; } - ret = nvkm_ram_new_(&nv04_ram_func, fb, type, size, tags, pram); + ret = nvkm_ram_new_(&nv04_ram_func, fb, type, size, pram); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv40.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv40.c index 70c63535d56b..2b12e388f47a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv40.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv40.c @@ -187,13 +187,13 @@ nv40_ram_func = { int nv40_ram_new_(struct nvkm_fb *fb, enum nvkm_ram_type type, u64 size, - u32 tags, struct nvkm_ram **pram) + struct nvkm_ram **pram) { struct nv40_ram *ram; if (!(ram = kzalloc(sizeof(*ram), GFP_KERNEL))) return -ENOMEM; *pram = &ram->base; - return nvkm_ram_ctor(&nv40_ram_func, fb, type, size, tags, &ram->base); + return nvkm_ram_ctor(&nv40_ram_func, fb, type, size, &ram->base); } int @@ -202,7 +202,6 @@ nv40_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) struct nvkm_device *device = fb->subdev.device; u32 pbus1218 = nvkm_rd32(device, 0x001218); u32 size = nvkm_rd32(device, 0x10020c) & 0xff000000; - u32 tags = nvkm_rd32(device, 0x100320); enum nvkm_ram_type type = NVKM_RAM_TYPE_UNKNOWN; int ret; @@ -213,7 +212,7 @@ nv40_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) case 0x00000300: type = NVKM_RAM_TYPE_DDR2 ; break; } - ret = nv40_ram_new_(fb, type, size, tags, pram); + ret = nv40_ram_new_(fb, type, size, pram); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv40.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv40.h index 8a0524566b48..ec5dcbfcaea8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv40.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv40.h @@ -9,6 +9,6 @@ struct nv40_ram { u32 coef; }; -int nv40_ram_new_(struct nvkm_fb *fb, enum nvkm_ram_type, u64, u32, +int nv40_ram_new_(struct nvkm_fb *fb, enum nvkm_ram_type, u64, struct nvkm_ram **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv41.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv41.c index 114828be292e..d3fea3726461 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv41.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv41.c @@ -28,7 +28,6 @@ nv41_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) { struct nvkm_device *device = fb->subdev.device; u32 size = nvkm_rd32(device, 0x10020c) & 0xff000000; - u32 tags = nvkm_rd32(device, 0x100320); u32 fb474 = nvkm_rd32(device, 0x100474); enum nvkm_ram_type type = NVKM_RAM_TYPE_UNKNOWN; int ret; @@ -40,7 +39,7 @@ nv41_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) if (fb474 & 0x00000001) type = NVKM_RAM_TYPE_DDR1; - ret = nv40_ram_new_(fb, type, size, tags, pram); + ret = nv40_ram_new_(fb, type, size, pram); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv44.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv44.c index bc56fbf1c788..ab2630e5e6fb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv44.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv44.c @@ -38,5 +38,5 @@ nv44_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) if (fb474 & 0x00000001) type = NVKM_RAM_TYPE_DDR1; - return nv40_ram_new_(fb, type, size, 0, pram); + return nv40_ram_new_(fb, type, size, pram); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv49.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv49.c index c01f4b1022b8..946ca7c2e0b6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv49.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv49.c @@ -28,7 +28,6 @@ nv49_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) { struct nvkm_device *device = fb->subdev.device; u32 size = nvkm_rd32(device, 0x10020c) & 0xff000000; - u32 tags = nvkm_rd32(device, 0x100320); u32 fb914 = nvkm_rd32(device, 0x100914); enum nvkm_ram_type type = NVKM_RAM_TYPE_UNKNOWN; int ret; @@ -40,7 +39,7 @@ nv49_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) case 0x00000003: break; } - ret = nv40_ram_new_(fb, type, size, tags, pram); + ret = nv40_ram_new_(fb, type, size, pram); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv4e.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv4e.c index fa3c2e06203d..02b8bdbc819f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv4e.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv4e.c @@ -29,5 +29,5 @@ nv4e_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) struct nvkm_device *device = fb->subdev.device; u32 size = nvkm_rd32(device, 0x10020c) & 0xff000000; return nvkm_ram_new_(&nv04_ram_func, fb, NVKM_RAM_TYPE_UNKNOWN, - size, 0, pram); + size, pram); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c index 6549b0588309..2ccb4b6be153 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c @@ -493,100 +493,8 @@ nv50_ram_tidy(struct nvkm_ram *base) ram_exec(&ram->hwsq, false); } -void -__nv50_ram_put(struct nvkm_ram *ram, struct nvkm_mem *mem) -{ - struct nvkm_mm_node *next = mem->mem; - struct nvkm_mm_node *node; - while ((node = next)) { - next = node->next; - nvkm_mm_free(&ram->vram, &node); - } - nvkm_mm_free(&ram->tags, &mem->tag); -} - -void -nv50_ram_put(struct nvkm_ram *ram, struct nvkm_mem **pmem) -{ - struct nvkm_mem *mem = *pmem; - - *pmem = NULL; - if (unlikely(mem == NULL)) - return; - - mutex_lock(&ram->fb->subdev.mutex); - __nv50_ram_put(ram, mem); - mutex_unlock(&ram->fb->subdev.mutex); - - kfree(mem); -} - -int -nv50_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin, - u32 memtype, struct nvkm_mem **pmem) -{ - struct nvkm_mm *heap = &ram->vram; - struct nvkm_mm *tags = &ram->tags; - struct nvkm_mm_node **node, *r; - struct nvkm_mem *mem; - int comp = (memtype & 0x300) >> 8; - int type = (memtype & 0x07f); - int back = (memtype & 0x800); - int min, max, ret; - - max = (size >> NVKM_RAM_MM_SHIFT); - min = ncmin ? (ncmin >> NVKM_RAM_MM_SHIFT) : max; - align >>= NVKM_RAM_MM_SHIFT; - - mem = kzalloc(sizeof(*mem), GFP_KERNEL); - if (!mem) - return -ENOMEM; - - mutex_lock(&ram->fb->subdev.mutex); - if (comp) { - if (align == (1 << (16 - NVKM_RAM_MM_SHIFT))) { - int n = (max >> 4) * comp; - - ret = nvkm_mm_head(tags, 0, 1, n, n, 1, &mem->tag); - if (ret) - mem->tag = NULL; - } - - if (unlikely(!mem->tag)) - comp = 0; - } - - mem->memtype = (comp << 7) | type; - mem->size = max; - - type = nv50_fb_memtype[type]; - node = &mem->mem; - do { - if (back) - ret = nvkm_mm_tail(heap, 0, type, max, min, align, &r); - else - ret = nvkm_mm_head(heap, 0, type, max, min, align, &r); - if (ret) { - mutex_unlock(&ram->fb->subdev.mutex); - ram->func->put(ram, &mem); - return ret; - } - - *node = r; - node = &r->next; - max -= r->length; - } while (max); - mutex_unlock(&ram->fb->subdev.mutex); - - mem->offset = (u64)mem->mem->offset << NVKM_RAM_MM_SHIFT; - *pmem = mem; - return 0; -} - static const struct nvkm_ram_func nv50_ram_func = { - .get = nv50_ram_get, - .put = nv50_ram_put, .calc = nv50_ram_calc, .prog = nv50_ram_prog, .tidy = nv50_ram_tidy, @@ -639,7 +547,6 @@ nv50_ram_ctor(const struct nvkm_ram_func *func, const u32 rsvd_head = ( 256 * 1024); /* vga memory */ const u32 rsvd_tail = (1024 * 1024); /* vbios etc */ u64 size = nvkm_rd32(device, 0x10020c); - u32 tags = nvkm_rd32(device, 0x100320); enum nvkm_ram_type type = NVKM_RAM_TYPE_UNKNOWN; int ret; @@ -660,7 +567,7 @@ nv50_ram_ctor(const struct nvkm_ram_func *func, size = (size & 0x000000ff) << 32 | (size & 0xffffff00); - ret = nvkm_ram_ctor(func, fb, type, size, tags, ram); + ret = nvkm_ram_ctor(func, fb, type, size, ram); if (ret) return ret; @@ -669,7 +576,8 @@ nv50_ram_ctor(const struct nvkm_ram_func *func, ram->ranks = (nvkm_rd32(device, 0x100200) & 0x4) ? 2 : 1; nvkm_mm_fini(&ram->vram); - return nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT, + return nvkm_mm_init(&ram->vram, NVKM_RAM_MM_NORMAL, + rsvd_head >> NVKM_RAM_MM_SHIFT, (size - rsvd_head - rsvd_tail) >> NVKM_RAM_MM_SHIFT, nv50_fb_vram_rblock(ram) >> NVKM_RAM_MM_SHIFT); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c index 10c987a654ec..364ea4492acc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c @@ -23,181 +23,90 @@ */ #include "priv.h" -#include <core/memory.h> #include <subdev/bar.h> /****************************************************************************** * instmem object base implementation *****************************************************************************/ -#define nvkm_instobj(p) container_of((p), struct nvkm_instobj, memory) - -struct nvkm_instobj { - struct nvkm_memory memory; - struct nvkm_memory *parent; - struct nvkm_instmem *imem; - struct list_head head; - u32 *suspend; - void __iomem *map; -}; - -static enum nvkm_memory_target -nvkm_instobj_target(struct nvkm_memory *memory) -{ - memory = nvkm_instobj(memory)->parent; - return nvkm_memory_target(memory); -} - -static u64 -nvkm_instobj_addr(struct nvkm_memory *memory) -{ - memory = nvkm_instobj(memory)->parent; - return nvkm_memory_addr(memory); -} - -static u64 -nvkm_instobj_size(struct nvkm_memory *memory) -{ - memory = nvkm_instobj(memory)->parent; - return nvkm_memory_size(memory); -} - static void -nvkm_instobj_release(struct nvkm_memory *memory) +nvkm_instobj_load(struct nvkm_instobj *iobj) { - struct nvkm_instobj *iobj = nvkm_instobj(memory); - nvkm_bar_flush(iobj->imem->subdev.device->bar); -} - -static void __iomem * -nvkm_instobj_acquire(struct nvkm_memory *memory) -{ - return nvkm_instobj(memory)->map; -} - -static u32 -nvkm_instobj_rd32(struct nvkm_memory *memory, u64 offset) -{ - return ioread32_native(nvkm_instobj(memory)->map + offset); -} - -static void -nvkm_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) -{ - iowrite32_native(data, nvkm_instobj(memory)->map + offset); -} + struct nvkm_memory *memory = &iobj->memory; + const u64 size = nvkm_memory_size(memory); + void __iomem *map; + int i; -static void -nvkm_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset) -{ - memory = nvkm_instobj(memory)->parent; - nvkm_memory_map(memory, vma, offset); -} + if (!(map = nvkm_kmap(memory))) { + for (i = 0; i < size; i += 4) + nvkm_wo32(memory, i, iobj->suspend[i / 4]); + } else { + memcpy_toio(map, iobj->suspend, size); + } + nvkm_done(memory); -static void * -nvkm_instobj_dtor(struct nvkm_memory *memory) -{ - struct nvkm_instobj *iobj = nvkm_instobj(memory); - spin_lock(&iobj->imem->lock); - list_del(&iobj->head); - spin_unlock(&iobj->imem->lock); - nvkm_memory_del(&iobj->parent); - return iobj; + kvfree(iobj->suspend); + iobj->suspend = NULL; } -static const struct nvkm_memory_func -nvkm_instobj_func = { - .dtor = nvkm_instobj_dtor, - .target = nvkm_instobj_target, - .addr = nvkm_instobj_addr, - .size = nvkm_instobj_size, - .acquire = nvkm_instobj_acquire, - .release = nvkm_instobj_release, - .rd32 = nvkm_instobj_rd32, - .wr32 = nvkm_instobj_wr32, - .map = nvkm_instobj_map, -}; - -static void -nvkm_instobj_boot(struct nvkm_memory *memory, struct nvkm_vm *vm) +static int +nvkm_instobj_save(struct nvkm_instobj *iobj) { - memory = nvkm_instobj(memory)->parent; - nvkm_memory_boot(memory, vm); -} + struct nvkm_memory *memory = &iobj->memory; + const u64 size = nvkm_memory_size(memory); + void __iomem *map; + int i; -static void -nvkm_instobj_release_slow(struct nvkm_memory *memory) -{ - struct nvkm_instobj *iobj = nvkm_instobj(memory); - nvkm_instobj_release(memory); - nvkm_done(iobj->parent); -} + iobj->suspend = kvmalloc(size, GFP_KERNEL); + if (!iobj->suspend) + return -ENOMEM; -static void __iomem * -nvkm_instobj_acquire_slow(struct nvkm_memory *memory) -{ - struct nvkm_instobj *iobj = nvkm_instobj(memory); - iobj->map = nvkm_kmap(iobj->parent); - if (iobj->map) - memory->func = &nvkm_instobj_func; - return iobj->map; + if (!(map = nvkm_kmap(memory))) { + for (i = 0; i < size; i += 4) + iobj->suspend[i / 4] = nvkm_ro32(memory, i); + } else { + memcpy_fromio(iobj->suspend, map, size); + } + nvkm_done(memory); + return 0; } -static u32 -nvkm_instobj_rd32_slow(struct nvkm_memory *memory, u64 offset) +void +nvkm_instobj_dtor(struct nvkm_instmem *imem, struct nvkm_instobj *iobj) { - struct nvkm_instobj *iobj = nvkm_instobj(memory); - return nvkm_ro32(iobj->parent, offset); + spin_lock(&imem->lock); + list_del(&iobj->head); + spin_unlock(&imem->lock); } -static void -nvkm_instobj_wr32_slow(struct nvkm_memory *memory, u64 offset, u32 data) +void +nvkm_instobj_ctor(const struct nvkm_memory_func *func, + struct nvkm_instmem *imem, struct nvkm_instobj *iobj) { - struct nvkm_instobj *iobj = nvkm_instobj(memory); - return nvkm_wo32(iobj->parent, offset, data); + nvkm_memory_ctor(func, &iobj->memory); + iobj->suspend = NULL; + spin_lock(&imem->lock); + list_add_tail(&iobj->head, &imem->list); + spin_unlock(&imem->lock); } -static const struct nvkm_memory_func -nvkm_instobj_func_slow = { - .dtor = nvkm_instobj_dtor, - .target = nvkm_instobj_target, - .addr = nvkm_instobj_addr, - .size = nvkm_instobj_size, - .boot = nvkm_instobj_boot, - .acquire = nvkm_instobj_acquire_slow, - .release = nvkm_instobj_release_slow, - .rd32 = nvkm_instobj_rd32_slow, - .wr32 = nvkm_instobj_wr32_slow, - .map = nvkm_instobj_map, -}; - int nvkm_instobj_new(struct nvkm_instmem *imem, u32 size, u32 align, bool zero, struct nvkm_memory **pmemory) { + struct nvkm_subdev *subdev = &imem->subdev; struct nvkm_memory *memory = NULL; - struct nvkm_instobj *iobj; u32 offset; int ret; ret = imem->func->memory_new(imem, size, align, zero, &memory); - if (ret) + if (ret) { + nvkm_error(subdev, "OOM: %08x %08x %d\n", size, align, ret); goto done; - - if (!imem->func->persistent) { - if (!(iobj = kzalloc(sizeof(*iobj), GFP_KERNEL))) { - ret = -ENOMEM; - goto done; - } - - nvkm_memory_ctor(&nvkm_instobj_func_slow, &iobj->memory); - iobj->parent = memory; - iobj->imem = imem; - spin_lock(&iobj->imem->lock); - list_add_tail(&iobj->head, &imem->list); - spin_unlock(&iobj->imem->lock); - memory = &iobj->memory; } + nvkm_trace(subdev, "new %08x %08x %d: %010llx %010llx\n", size, align, + zero, nvkm_memory_addr(memory), nvkm_memory_size(memory)); + if (!imem->func->zero && zero) { void __iomem *map = nvkm_kmap(memory); if (unlikely(!map)) { @@ -211,7 +120,7 @@ nvkm_instobj_new(struct nvkm_instmem *imem, u32 size, u32 align, bool zero, done: if (ret) - nvkm_memory_del(&memory); + nvkm_memory_unref(&memory); *pmemory = memory; return ret; } @@ -232,39 +141,46 @@ nvkm_instmem_wr32(struct nvkm_instmem *imem, u32 addr, u32 data) return imem->func->wr32(imem, addr, data); } +void +nvkm_instmem_boot(struct nvkm_instmem *imem) +{ + /* Separate bootstrapped objects from normal list, as we need + * to make sure they're accessed with the slowpath on suspend + * and resume. + */ + struct nvkm_instobj *iobj, *itmp; + spin_lock(&imem->lock); + list_for_each_entry_safe(iobj, itmp, &imem->list, head) { + list_move_tail(&iobj->head, &imem->boot); + } + spin_unlock(&imem->lock); +} + static int nvkm_instmem_fini(struct nvkm_subdev *subdev, bool suspend) { struct nvkm_instmem *imem = nvkm_instmem(subdev); struct nvkm_instobj *iobj; - int i; - - if (imem->func->fini) - imem->func->fini(imem); if (suspend) { list_for_each_entry(iobj, &imem->list, head) { - struct nvkm_memory *memory = iobj->parent; - u64 size = nvkm_memory_size(memory); + int ret = nvkm_instobj_save(iobj); + if (ret) + return ret; + } - iobj->suspend = vmalloc(size); - if (!iobj->suspend) - return -ENOMEM; + nvkm_bar_bar2_fini(subdev->device); - for (i = 0; i < size; i += 4) - iobj->suspend[i / 4] = nvkm_ro32(memory, i); + list_for_each_entry(iobj, &imem->boot, head) { + int ret = nvkm_instobj_save(iobj); + if (ret) + return ret; } } - return 0; -} + if (imem->func->fini) + imem->func->fini(imem); -static int -nvkm_instmem_oneinit(struct nvkm_subdev *subdev) -{ - struct nvkm_instmem *imem = nvkm_instmem(subdev); - if (imem->func->oneinit) - return imem->func->oneinit(imem); return 0; } @@ -273,22 +189,31 @@ nvkm_instmem_init(struct nvkm_subdev *subdev) { struct nvkm_instmem *imem = nvkm_instmem(subdev); struct nvkm_instobj *iobj; - int i; + + list_for_each_entry(iobj, &imem->boot, head) { + if (iobj->suspend) + nvkm_instobj_load(iobj); + } + + nvkm_bar_bar2_init(subdev->device); list_for_each_entry(iobj, &imem->list, head) { - if (iobj->suspend) { - struct nvkm_memory *memory = iobj->parent; - u64 size = nvkm_memory_size(memory); - for (i = 0; i < size; i += 4) - nvkm_wo32(memory, i, iobj->suspend[i / 4]); - vfree(iobj->suspend); - iobj->suspend = NULL; - } + if (iobj->suspend) + nvkm_instobj_load(iobj); } return 0; } +static int +nvkm_instmem_oneinit(struct nvkm_subdev *subdev) +{ + struct nvkm_instmem *imem = nvkm_instmem(subdev); + if (imem->func->oneinit) + return imem->func->oneinit(imem); + return 0; +} + static void * nvkm_instmem_dtor(struct nvkm_subdev *subdev) { @@ -315,4 +240,5 @@ nvkm_instmem_ctor(const struct nvkm_instmem_func *func, imem->func = func; spin_lock_init(&imem->lock); INIT_LIST_HEAD(&imem->list); + INIT_LIST_HEAD(&imem->boot); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index cd5adbec5e57..985f2990ab0d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -44,14 +44,13 @@ #include "priv.h" #include <core/memory.h> -#include <core/mm.h> #include <core/tegra.h> -#include <subdev/fb.h> #include <subdev/ltc.h> +#include <subdev/mmu.h> struct gk20a_instobj { struct nvkm_memory memory; - struct nvkm_mem mem; + struct nvkm_mm_node *mn; struct gk20a_instmem *imem; /* CPU mapping */ @@ -119,16 +118,22 @@ gk20a_instobj_target(struct nvkm_memory *memory) return NVKM_MEM_TARGET_NCOH; } +static u8 +gk20a_instobj_page(struct nvkm_memory *memory) +{ + return 12; +} + static u64 gk20a_instobj_addr(struct nvkm_memory *memory) { - return gk20a_instobj(memory)->mem.offset; + return (u64)gk20a_instobj(memory)->mn->offset << 12; } static u64 gk20a_instobj_size(struct nvkm_memory *memory) { - return (u64)gk20a_instobj(memory)->mem.size << 12; + return (u64)gk20a_instobj(memory)->mn->length << 12; } /* @@ -272,12 +277,18 @@ gk20a_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) node->vaddr[offset / 4] = data; } -static void -gk20a_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset) +static int +gk20a_instobj_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm, + struct nvkm_vma *vma, void *argv, u32 argc) { struct gk20a_instobj *node = gk20a_instobj(memory); + struct nvkm_vmm_map map = { + .memory = &node->memory, + .offset = offset, + .mem = node->mn, + }; - nvkm_vm_map_at(vma, offset, &node->mem); + return nvkm_vmm_map(vmm, vma, argv, argc, &map); } static void * @@ -290,8 +301,8 @@ gk20a_instobj_dtor_dma(struct nvkm_memory *memory) if (unlikely(!node->base.vaddr)) goto out; - dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->base.vaddr, - node->handle, imem->attrs); + dma_free_attrs(dev, (u64)node->base.mn->length << PAGE_SHIFT, + node->base.vaddr, node->handle, imem->attrs); out: return node; @@ -303,7 +314,7 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory) struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); struct gk20a_instmem *imem = node->base.imem; struct device *dev = imem->base.subdev.device->dev; - struct nvkm_mm_node *r = node->base.mem.mem; + struct nvkm_mm_node *r = node->base.mn; int i; if (unlikely(!r)) @@ -321,7 +332,7 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory) r->offset &= ~BIT(imem->iommu_bit - imem->iommu_pgshift); /* Unmap pages from GPU address space and free them */ - for (i = 0; i < node->base.mem.size; i++) { + for (i = 0; i < node->base.mn->length; i++) { iommu_unmap(imem->domain, (r->offset + i) << imem->iommu_pgshift, PAGE_SIZE); dma_unmap_page(dev, node->dma_addrs[i], PAGE_SIZE, @@ -342,12 +353,11 @@ static const struct nvkm_memory_func gk20a_instobj_func_dma = { .dtor = gk20a_instobj_dtor_dma, .target = gk20a_instobj_target, + .page = gk20a_instobj_page, .addr = gk20a_instobj_addr, .size = gk20a_instobj_size, .acquire = gk20a_instobj_acquire_dma, .release = gk20a_instobj_release_dma, - .rd32 = gk20a_instobj_rd32, - .wr32 = gk20a_instobj_wr32, .map = gk20a_instobj_map, }; @@ -355,13 +365,18 @@ static const struct nvkm_memory_func gk20a_instobj_func_iommu = { .dtor = gk20a_instobj_dtor_iommu, .target = gk20a_instobj_target, + .page = gk20a_instobj_page, .addr = gk20a_instobj_addr, .size = gk20a_instobj_size, .acquire = gk20a_instobj_acquire_iommu, .release = gk20a_instobj_release_iommu, + .map = gk20a_instobj_map, +}; + +static const struct nvkm_memory_ptrs +gk20a_instobj_ptrs = { .rd32 = gk20a_instobj_rd32, .wr32 = gk20a_instobj_wr32, - .map = gk20a_instobj_map, }; static int @@ -377,6 +392,7 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, *_node = &node->base; nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory); + node->base.memory.ptrs = &gk20a_instobj_ptrs; node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, &node->handle, GFP_KERNEL, @@ -397,8 +413,7 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, node->r.offset = node->handle >> 12; node->r.length = (npages << PAGE_SHIFT) >> 12; - node->base.mem.offset = node->handle; - node->base.mem.mem = &node->r; + node->base.mn = &node->r; return 0; } @@ -424,6 +439,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, node->dma_addrs = (void *)(node->pages + npages); nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.memory); + node->base.memory.ptrs = &gk20a_instobj_ptrs; /* Allocate backing memory */ for (i = 0; i < npages; i++) { @@ -474,8 +490,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, /* IOMMU bit tells that an address is to be resolved through the IOMMU */ r->offset |= BIT(imem->iommu_bit - imem->iommu_pgshift); - node->base.mem.offset = ((u64)r->offset) << imem->iommu_pgshift; - node->base.mem.mem = r; + node->base.mn = r; return 0; release_area: @@ -523,13 +538,8 @@ gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero, node->imem = imem; - /* present memory for being mapped using small pages */ - node->mem.size = size >> 12; - node->mem.memtype = 0; - node->mem.page_shift = 12; - nvkm_debug(subdev, "alloc size: 0x%x, align: 0x%x, gaddr: 0x%llx\n", - size, align, node->mem.offset); + size, align, (u64)node->mn->offset << 12); return 0; } @@ -554,7 +564,6 @@ static const struct nvkm_instmem_func gk20a_instmem = { .dtor = gk20a_instmem_dtor, .memory_new = gk20a_instobj_new, - .persistent = true, .zero = false, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c index 6133c8bb2d42..6bf0dad46919 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c @@ -24,7 +24,6 @@ #define nv04_instmem(p) container_of((p), struct nv04_instmem, base) #include "priv.h" -#include <core/memory.h> #include <core/ramht.h> struct nv04_instmem { @@ -35,30 +34,39 @@ struct nv04_instmem { /****************************************************************************** * instmem object implementation *****************************************************************************/ -#define nv04_instobj(p) container_of((p), struct nv04_instobj, memory) +#define nv04_instobj(p) container_of((p), struct nv04_instobj, base.memory) struct nv04_instobj { - struct nvkm_memory memory; + struct nvkm_instobj base; struct nv04_instmem *imem; struct nvkm_mm_node *node; }; -static enum nvkm_memory_target -nv04_instobj_target(struct nvkm_memory *memory) +static void +nv04_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) { - return NVKM_MEM_TARGET_INST; + struct nv04_instobj *iobj = nv04_instobj(memory); + struct nvkm_device *device = iobj->imem->base.subdev.device; + nvkm_wr32(device, 0x700000 + iobj->node->offset + offset, data); } -static u64 -nv04_instobj_addr(struct nvkm_memory *memory) +static u32 +nv04_instobj_rd32(struct nvkm_memory *memory, u64 offset) { - return nv04_instobj(memory)->node->offset; + struct nv04_instobj *iobj = nv04_instobj(memory); + struct nvkm_device *device = iobj->imem->base.subdev.device; + return nvkm_rd32(device, 0x700000 + iobj->node->offset + offset); } -static u64 -nv04_instobj_size(struct nvkm_memory *memory) +static const struct nvkm_memory_ptrs +nv04_instobj_ptrs = { + .rd32 = nv04_instobj_rd32, + .wr32 = nv04_instobj_wr32, +}; + +static void +nv04_instobj_release(struct nvkm_memory *memory) { - return nv04_instobj(memory)->node->length; } static void __iomem * @@ -69,25 +77,22 @@ nv04_instobj_acquire(struct nvkm_memory *memory) return device->pri + 0x700000 + iobj->node->offset; } -static void -nv04_instobj_release(struct nvkm_memory *memory) +static u64 +nv04_instobj_size(struct nvkm_memory *memory) { + return nv04_instobj(memory)->node->length; } -static u32 -nv04_instobj_rd32(struct nvkm_memory *memory, u64 offset) +static u64 +nv04_instobj_addr(struct nvkm_memory *memory) { - struct nv04_instobj *iobj = nv04_instobj(memory); - struct nvkm_device *device = iobj->imem->base.subdev.device; - return nvkm_rd32(device, 0x700000 + iobj->node->offset + offset); + return nv04_instobj(memory)->node->offset; } -static void -nv04_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) +static enum nvkm_memory_target +nv04_instobj_target(struct nvkm_memory *memory) { - struct nv04_instobj *iobj = nv04_instobj(memory); - struct nvkm_device *device = iobj->imem->base.subdev.device; - nvkm_wr32(device, 0x700000 + iobj->node->offset + offset, data); + return NVKM_MEM_TARGET_INST; } static void * @@ -97,6 +102,7 @@ nv04_instobj_dtor(struct nvkm_memory *memory) mutex_lock(&iobj->imem->base.subdev.mutex); nvkm_mm_free(&iobj->imem->heap, &iobj->node); mutex_unlock(&iobj->imem->base.subdev.mutex); + nvkm_instobj_dtor(&iobj->imem->base, &iobj->base); return iobj; } @@ -108,8 +114,6 @@ nv04_instobj_func = { .addr = nv04_instobj_addr, .acquire = nv04_instobj_acquire, .release = nv04_instobj_release, - .rd32 = nv04_instobj_rd32, - .wr32 = nv04_instobj_wr32, }; static int @@ -122,9 +126,10 @@ nv04_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero, if (!(iobj = kzalloc(sizeof(*iobj), GFP_KERNEL))) return -ENOMEM; - *pmemory = &iobj->memory; + *pmemory = &iobj->base.memory; - nvkm_memory_ctor(&nv04_instobj_func, &iobj->memory); + nvkm_instobj_ctor(&nv04_instobj_func, &imem->base, &iobj->base); + iobj->base.memory.ptrs = &nv04_instobj_ptrs; iobj->imem = imem; mutex_lock(&imem->base.subdev.mutex); @@ -160,7 +165,7 @@ nv04_instmem_oneinit(struct nvkm_instmem *base) /* PRAMIN aperture maps over the end of VRAM, reserve it */ imem->base.reserved = 512 * 1024; - ret = nvkm_mm_init(&imem->heap, 0, imem->base.reserved, 1); + ret = nvkm_mm_init(&imem->heap, 0, 0, imem->base.reserved, 1); if (ret) return ret; @@ -194,10 +199,10 @@ static void * nv04_instmem_dtor(struct nvkm_instmem *base) { struct nv04_instmem *imem = nv04_instmem(base); - nvkm_memory_del(&imem->base.ramfc); - nvkm_memory_del(&imem->base.ramro); + nvkm_memory_unref(&imem->base.ramfc); + nvkm_memory_unref(&imem->base.ramro); nvkm_ramht_del(&imem->base.ramht); - nvkm_memory_del(&imem->base.vbios); + nvkm_memory_unref(&imem->base.vbios); nvkm_mm_fini(&imem->heap); return imem; } @@ -209,7 +214,6 @@ nv04_instmem = { .rd32 = nv04_instmem_rd32, .wr32 = nv04_instmem_wr32, .memory_new = nv04_instobj_new, - .persistent = false, .zero = false, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv40.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv40.c index c0543875e490..086c118488ef 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv40.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv40.c @@ -24,7 +24,6 @@ #define nv40_instmem(p) container_of((p), struct nv40_instmem, base) #include "priv.h" -#include <core/memory.h> #include <core/ramht.h> #include <engine/gr/nv40.h> @@ -37,30 +36,38 @@ struct nv40_instmem { /****************************************************************************** * instmem object implementation *****************************************************************************/ -#define nv40_instobj(p) container_of((p), struct nv40_instobj, memory) +#define nv40_instobj(p) container_of((p), struct nv40_instobj, base.memory) struct nv40_instobj { - struct nvkm_memory memory; + struct nvkm_instobj base; struct nv40_instmem *imem; struct nvkm_mm_node *node; }; -static enum nvkm_memory_target -nv40_instobj_target(struct nvkm_memory *memory) +static void +nv40_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) { - return NVKM_MEM_TARGET_INST; + struct nv40_instobj *iobj = nv40_instobj(memory); + iowrite32_native(data, iobj->imem->iomem + iobj->node->offset + offset); } -static u64 -nv40_instobj_addr(struct nvkm_memory *memory) +static u32 +nv40_instobj_rd32(struct nvkm_memory *memory, u64 offset) { - return nv40_instobj(memory)->node->offset; + struct nv40_instobj *iobj = nv40_instobj(memory); + return ioread32_native(iobj->imem->iomem + iobj->node->offset + offset); } -static u64 -nv40_instobj_size(struct nvkm_memory *memory) +static const struct nvkm_memory_ptrs +nv40_instobj_ptrs = { + .rd32 = nv40_instobj_rd32, + .wr32 = nv40_instobj_wr32, +}; + +static void +nv40_instobj_release(struct nvkm_memory *memory) { - return nv40_instobj(memory)->node->length; + wmb(); } static void __iomem * @@ -70,23 +77,22 @@ nv40_instobj_acquire(struct nvkm_memory *memory) return iobj->imem->iomem + iobj->node->offset; } -static void -nv40_instobj_release(struct nvkm_memory *memory) +static u64 +nv40_instobj_size(struct nvkm_memory *memory) { + return nv40_instobj(memory)->node->length; } -static u32 -nv40_instobj_rd32(struct nvkm_memory *memory, u64 offset) +static u64 +nv40_instobj_addr(struct nvkm_memory *memory) { - struct nv40_instobj *iobj = nv40_instobj(memory); - return ioread32_native(iobj->imem->iomem + iobj->node->offset + offset); + return nv40_instobj(memory)->node->offset; } -static void -nv40_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) +static enum nvkm_memory_target +nv40_instobj_target(struct nvkm_memory *memory) { - struct nv40_instobj *iobj = nv40_instobj(memory); - iowrite32_native(data, iobj->imem->iomem + iobj->node->offset + offset); + return NVKM_MEM_TARGET_INST; } static void * @@ -96,6 +102,7 @@ nv40_instobj_dtor(struct nvkm_memory *memory) mutex_lock(&iobj->imem->base.subdev.mutex); nvkm_mm_free(&iobj->imem->heap, &iobj->node); mutex_unlock(&iobj->imem->base.subdev.mutex); + nvkm_instobj_dtor(&iobj->imem->base, &iobj->base); return iobj; } @@ -107,8 +114,6 @@ nv40_instobj_func = { .addr = nv40_instobj_addr, .acquire = nv40_instobj_acquire, .release = nv40_instobj_release, - .rd32 = nv40_instobj_rd32, - .wr32 = nv40_instobj_wr32, }; static int @@ -121,9 +126,10 @@ nv40_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero, if (!(iobj = kzalloc(sizeof(*iobj), GFP_KERNEL))) return -ENOMEM; - *pmemory = &iobj->memory; + *pmemory = &iobj->base.memory; - nvkm_memory_ctor(&nv40_instobj_func, &iobj->memory); + nvkm_instobj_ctor(&nv40_instobj_func, &imem->base, &iobj->base); + iobj->base.memory.ptrs = &nv40_instobj_ptrs; iobj->imem = imem; mutex_lock(&imem->base.subdev.mutex); @@ -171,7 +177,7 @@ nv40_instmem_oneinit(struct nvkm_instmem *base) imem->base.reserved += 512 * 1024; /* object storage */ imem->base.reserved = round_up(imem->base.reserved, 4096); - ret = nvkm_mm_init(&imem->heap, 0, imem->base.reserved, 1); + ret = nvkm_mm_init(&imem->heap, 0, 0, imem->base.reserved, 1); if (ret) return ret; @@ -209,10 +215,10 @@ static void * nv40_instmem_dtor(struct nvkm_instmem *base) { struct nv40_instmem *imem = nv40_instmem(base); - nvkm_memory_del(&imem->base.ramfc); - nvkm_memory_del(&imem->base.ramro); + nvkm_memory_unref(&imem->base.ramfc); + nvkm_memory_unref(&imem->base.ramro); nvkm_ramht_del(&imem->base.ramht); - nvkm_memory_del(&imem->base.vbios); + nvkm_memory_unref(&imem->base.vbios); nvkm_mm_fini(&imem->heap); if (imem->iomem) iounmap(imem->iomem); @@ -226,7 +232,6 @@ nv40_instmem = { .rd32 = nv40_instmem_rd32, .wr32 = nv40_instmem_wr32, .memory_new = nv40_instobj_new, - .persistent = false, .zero = false, }; @@ -248,8 +253,8 @@ nv40_instmem_new(struct nvkm_device *device, int index, else bar = 3; - imem->iomem = ioremap(device->func->resource_addr(device, bar), - device->func->resource_size(device, bar)); + imem->iomem = ioremap_wc(device->func->resource_addr(device, bar), + device->func->resource_size(device, bar)); if (!imem->iomem) { nvkm_error(&imem->base.subdev, "unable to map PRAMIN BAR\n"); return -EFAULT; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c index 6d512c062ae3..1ba7289684aa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c @@ -31,147 +31,293 @@ struct nv50_instmem { struct nvkm_instmem base; - unsigned long lock_flags; - spinlock_t lock; u64 addr; + + /* Mappings that can be evicted when BAR2 space has been exhausted. */ + struct list_head lru; }; /****************************************************************************** * instmem object implementation *****************************************************************************/ -#define nv50_instobj(p) container_of((p), struct nv50_instobj, memory) +#define nv50_instobj(p) container_of((p), struct nv50_instobj, base.memory) struct nv50_instobj { - struct nvkm_memory memory; + struct nvkm_instobj base; struct nv50_instmem *imem; - struct nvkm_mem *mem; - struct nvkm_vma bar; + struct nvkm_memory *ram; + struct nvkm_vma *bar; + refcount_t maps; void *map; + struct list_head lru; }; -static enum nvkm_memory_target -nv50_instobj_target(struct nvkm_memory *memory) +static void +nv50_instobj_wr32_slow(struct nvkm_memory *memory, u64 offset, u32 data) { - return NVKM_MEM_TARGET_VRAM; + struct nv50_instobj *iobj = nv50_instobj(memory); + struct nv50_instmem *imem = iobj->imem; + struct nvkm_device *device = imem->base.subdev.device; + u64 base = (nvkm_memory_addr(iobj->ram) + offset) & 0xffffff00000ULL; + u64 addr = (nvkm_memory_addr(iobj->ram) + offset) & 0x000000fffffULL; + unsigned long flags; + + spin_lock_irqsave(&imem->base.lock, flags); + if (unlikely(imem->addr != base)) { + nvkm_wr32(device, 0x001700, base >> 16); + imem->addr = base; + } + nvkm_wr32(device, 0x700000 + addr, data); + spin_unlock_irqrestore(&imem->base.lock, flags); } -static u64 -nv50_instobj_addr(struct nvkm_memory *memory) +static u32 +nv50_instobj_rd32_slow(struct nvkm_memory *memory, u64 offset) { - return nv50_instobj(memory)->mem->offset; + struct nv50_instobj *iobj = nv50_instobj(memory); + struct nv50_instmem *imem = iobj->imem; + struct nvkm_device *device = imem->base.subdev.device; + u64 base = (nvkm_memory_addr(iobj->ram) + offset) & 0xffffff00000ULL; + u64 addr = (nvkm_memory_addr(iobj->ram) + offset) & 0x000000fffffULL; + u32 data; + unsigned long flags; + + spin_lock_irqsave(&imem->base.lock, flags); + if (unlikely(imem->addr != base)) { + nvkm_wr32(device, 0x001700, base >> 16); + imem->addr = base; + } + data = nvkm_rd32(device, 0x700000 + addr); + spin_unlock_irqrestore(&imem->base.lock, flags); + return data; } -static u64 -nv50_instobj_size(struct nvkm_memory *memory) +static const struct nvkm_memory_ptrs +nv50_instobj_slow = { + .rd32 = nv50_instobj_rd32_slow, + .wr32 = nv50_instobj_wr32_slow, +}; + +static void +nv50_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) { - return (u64)nv50_instobj(memory)->mem->size << NVKM_RAM_MM_SHIFT; + iowrite32_native(data, nv50_instobj(memory)->map + offset); } +static u32 +nv50_instobj_rd32(struct nvkm_memory *memory, u64 offset) +{ + return ioread32_native(nv50_instobj(memory)->map + offset); +} + +static const struct nvkm_memory_ptrs +nv50_instobj_fast = { + .rd32 = nv50_instobj_rd32, + .wr32 = nv50_instobj_wr32, +}; + static void -nv50_instobj_boot(struct nvkm_memory *memory, struct nvkm_vm *vm) +nv50_instobj_kmap(struct nv50_instobj *iobj, struct nvkm_vmm *vmm) { - struct nv50_instobj *iobj = nv50_instobj(memory); - struct nvkm_subdev *subdev = &iobj->imem->base.subdev; + struct nv50_instmem *imem = iobj->imem; + struct nv50_instobj *eobj; + struct nvkm_memory *memory = &iobj->base.memory; + struct nvkm_subdev *subdev = &imem->base.subdev; struct nvkm_device *device = subdev->device; + struct nvkm_vma *bar = NULL, *ebar; u64 size = nvkm_memory_size(memory); - void __iomem *map; + void *emap; int ret; - iobj->map = ERR_PTR(-ENOMEM); - - ret = nvkm_vm_get(vm, size, 12, NV_MEM_ACCESS_RW, &iobj->bar); - if (ret == 0) { - map = ioremap(device->func->resource_addr(device, 3) + - (u32)iobj->bar.offset, size); - if (map) { - nvkm_memory_map(memory, &iobj->bar, 0); - iobj->map = map; - } else { - nvkm_warn(subdev, "PRAMIN ioremap failed\n"); - nvkm_vm_put(&iobj->bar); + /* Attempt to allocate BAR2 address-space and map the object + * into it. The lock has to be dropped while doing this due + * to the possibility of recursion for page table allocation. + */ + mutex_unlock(&subdev->mutex); + while ((ret = nvkm_vmm_get(vmm, 12, size, &bar))) { + /* Evict unused mappings, and keep retrying until we either + * succeed,or there's no more objects left on the LRU. + */ + mutex_lock(&subdev->mutex); + eobj = list_first_entry_or_null(&imem->lru, typeof(*eobj), lru); + if (eobj) { + nvkm_debug(subdev, "evict %016llx %016llx @ %016llx\n", + nvkm_memory_addr(&eobj->base.memory), + nvkm_memory_size(&eobj->base.memory), + eobj->bar->addr); + list_del_init(&eobj->lru); + ebar = eobj->bar; + eobj->bar = NULL; + emap = eobj->map; + eobj->map = NULL; } - } else { - nvkm_warn(subdev, "PRAMIN exhausted\n"); + mutex_unlock(&subdev->mutex); + if (!eobj) + break; + iounmap(emap); + nvkm_vmm_put(vmm, &ebar); } + + if (ret == 0) + ret = nvkm_memory_map(memory, 0, vmm, bar, NULL, 0); + mutex_lock(&subdev->mutex); + if (ret || iobj->bar) { + /* We either failed, or another thread beat us. */ + mutex_unlock(&subdev->mutex); + nvkm_vmm_put(vmm, &bar); + mutex_lock(&subdev->mutex); + return; + } + + /* Make the mapping visible to the host. */ + iobj->bar = bar; + iobj->map = ioremap_wc(device->func->resource_addr(device, 3) + + (u32)iobj->bar->addr, size); + if (!iobj->map) { + nvkm_warn(subdev, "PRAMIN ioremap failed\n"); + nvkm_vmm_put(vmm, &iobj->bar); + } +} + +static int +nv50_instobj_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm, + struct nvkm_vma *vma, void *argv, u32 argc) +{ + memory = nv50_instobj(memory)->ram; + return nvkm_memory_map(memory, offset, vmm, vma, argv, argc); } static void nv50_instobj_release(struct nvkm_memory *memory) { - struct nv50_instmem *imem = nv50_instobj(memory)->imem; - spin_unlock_irqrestore(&imem->lock, imem->lock_flags); + struct nv50_instobj *iobj = nv50_instobj(memory); + struct nv50_instmem *imem = iobj->imem; + struct nvkm_subdev *subdev = &imem->base.subdev; + + wmb(); + nvkm_bar_flush(subdev->device->bar); + + if (refcount_dec_and_mutex_lock(&iobj->maps, &subdev->mutex)) { + /* Add the now-unused mapping to the LRU instead of directly + * unmapping it here, in case we need to map it again later. + */ + if (likely(iobj->lru.next) && iobj->map) { + BUG_ON(!list_empty(&iobj->lru)); + list_add_tail(&iobj->lru, &imem->lru); + } + + /* Switch back to NULL accessors when last map is gone. */ + iobj->base.memory.ptrs = NULL; + mutex_unlock(&subdev->mutex); + } } static void __iomem * nv50_instobj_acquire(struct nvkm_memory *memory) { struct nv50_instobj *iobj = nv50_instobj(memory); - struct nv50_instmem *imem = iobj->imem; - struct nvkm_bar *bar = imem->base.subdev.device->bar; - struct nvkm_vm *vm; - unsigned long flags; + struct nvkm_instmem *imem = &iobj->imem->base; + struct nvkm_vmm *vmm; + void __iomem *map = NULL; - if (!iobj->map && (vm = nvkm_bar_kmap(bar))) - nvkm_memory_boot(memory, vm); - if (!IS_ERR_OR_NULL(iobj->map)) + /* Already mapped? */ + if (refcount_inc_not_zero(&iobj->maps)) return iobj->map; - spin_lock_irqsave(&imem->lock, flags); - imem->lock_flags = flags; - return NULL; -} + /* Take the lock, and re-check that another thread hasn't + * already mapped the object in the meantime. + */ + mutex_lock(&imem->subdev.mutex); + if (refcount_inc_not_zero(&iobj->maps)) { + mutex_unlock(&imem->subdev.mutex); + return iobj->map; + } -static u32 -nv50_instobj_rd32(struct nvkm_memory *memory, u64 offset) -{ - struct nv50_instobj *iobj = nv50_instobj(memory); - struct nv50_instmem *imem = iobj->imem; - struct nvkm_device *device = imem->base.subdev.device; - u64 base = (iobj->mem->offset + offset) & 0xffffff00000ULL; - u64 addr = (iobj->mem->offset + offset) & 0x000000fffffULL; - u32 data; + /* Attempt to get a direct CPU mapping of the object. */ + if ((vmm = nvkm_bar_bar2_vmm(imem->subdev.device))) { + if (!iobj->map) + nv50_instobj_kmap(iobj, vmm); + map = iobj->map; + } - if (unlikely(imem->addr != base)) { - nvkm_wr32(device, 0x001700, base >> 16); - imem->addr = base; + if (!refcount_inc_not_zero(&iobj->maps)) { + /* Exclude object from eviction while it's being accessed. */ + if (likely(iobj->lru.next)) + list_del_init(&iobj->lru); + + if (map) + iobj->base.memory.ptrs = &nv50_instobj_fast; + else + iobj->base.memory.ptrs = &nv50_instobj_slow; + refcount_inc(&iobj->maps); } - data = nvkm_rd32(device, 0x700000 + addr); - return data; + + mutex_unlock(&imem->subdev.mutex); + return map; } static void -nv50_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) +nv50_instobj_boot(struct nvkm_memory *memory, struct nvkm_vmm *vmm) { struct nv50_instobj *iobj = nv50_instobj(memory); - struct nv50_instmem *imem = iobj->imem; - struct nvkm_device *device = imem->base.subdev.device; - u64 base = (iobj->mem->offset + offset) & 0xffffff00000ULL; - u64 addr = (iobj->mem->offset + offset) & 0x000000fffffULL; - - if (unlikely(imem->addr != base)) { - nvkm_wr32(device, 0x001700, base >> 16); - imem->addr = base; + struct nvkm_instmem *imem = &iobj->imem->base; + + /* Exclude bootstrapped objects (ie. the page tables for the + * instmem BAR itself) from eviction. + */ + mutex_lock(&imem->subdev.mutex); + if (likely(iobj->lru.next)) { + list_del_init(&iobj->lru); + iobj->lru.next = NULL; } - nvkm_wr32(device, 0x700000 + addr, data); + + nv50_instobj_kmap(iobj, vmm); + nvkm_instmem_boot(imem); + mutex_unlock(&imem->subdev.mutex); } -static void -nv50_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset) +static u64 +nv50_instobj_size(struct nvkm_memory *memory) { - struct nv50_instobj *iobj = nv50_instobj(memory); - nvkm_vm_map_at(vma, offset, iobj->mem); + return nvkm_memory_size(nv50_instobj(memory)->ram); +} + +static u64 +nv50_instobj_addr(struct nvkm_memory *memory) +{ + return nvkm_memory_addr(nv50_instobj(memory)->ram); +} + +static enum nvkm_memory_target +nv50_instobj_target(struct nvkm_memory *memory) +{ + return nvkm_memory_target(nv50_instobj(memory)->ram); } static void * nv50_instobj_dtor(struct nvkm_memory *memory) { struct nv50_instobj *iobj = nv50_instobj(memory); - struct nvkm_ram *ram = iobj->imem->base.subdev.device->fb->ram; - if (!IS_ERR_OR_NULL(iobj->map)) { - nvkm_vm_put(&iobj->bar); - iounmap(iobj->map); + struct nvkm_instmem *imem = &iobj->imem->base; + struct nvkm_vma *bar; + void *map = map; + + mutex_lock(&imem->subdev.mutex); + if (likely(iobj->lru.next)) + list_del(&iobj->lru); + map = iobj->map; + bar = iobj->bar; + mutex_unlock(&imem->subdev.mutex); + + if (map) { + struct nvkm_vmm *vmm = nvkm_bar_bar2_vmm(imem->subdev.device); + iounmap(map); + if (likely(vmm)) /* Can be NULL during BAR destructor. */ + nvkm_vmm_put(vmm, &bar); } - ram->func->put(ram, &iobj->mem); + + nvkm_memory_unref(&iobj->ram); + nvkm_instobj_dtor(imem, &iobj->base); return iobj; } @@ -184,8 +330,6 @@ nv50_instobj_func = { .boot = nv50_instobj_boot, .acquire = nv50_instobj_acquire, .release = nv50_instobj_release, - .rd32 = nv50_instobj_rd32, - .wr32 = nv50_instobj_wr32, .map = nv50_instobj_map, }; @@ -195,25 +339,19 @@ nv50_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero, { struct nv50_instmem *imem = nv50_instmem(base); struct nv50_instobj *iobj; - struct nvkm_ram *ram = imem->base.subdev.device->fb->ram; - int ret; + struct nvkm_device *device = imem->base.subdev.device; + u8 page = max(order_base_2(align), 12); if (!(iobj = kzalloc(sizeof(*iobj), GFP_KERNEL))) return -ENOMEM; - *pmemory = &iobj->memory; + *pmemory = &iobj->base.memory; - nvkm_memory_ctor(&nv50_instobj_func, &iobj->memory); + nvkm_instobj_ctor(&nv50_instobj_func, &imem->base, &iobj->base); iobj->imem = imem; + refcount_set(&iobj->maps, 0); + INIT_LIST_HEAD(&iobj->lru); - size = max((size + 4095) & ~4095, (u32)4096); - align = max((align + 4095) & ~4095, (u32)4096); - - ret = ram->func->get(ram, size, align, 0, 0x800, &iobj->mem); - if (ret) - return ret; - - iobj->mem->page_shift = 12; - return 0; + return nvkm_ram_get(device, 0, 1, page, size, true, true, &iobj->ram); } /****************************************************************************** @@ -230,7 +368,6 @@ static const struct nvkm_instmem_func nv50_instmem = { .fini = nv50_instmem_fini, .memory_new = nv50_instobj_new, - .persistent = false, .zero = false, }; @@ -243,7 +380,7 @@ nv50_instmem_new(struct nvkm_device *device, int index, if (!(imem = kzalloc(sizeof(*imem), GFP_KERNEL))) return -ENOMEM; nvkm_instmem_ctor(&nv50_instmem, device, index, &imem->base); - spin_lock_init(&imem->lock); + INIT_LIST_HEAD(&imem->lru); *pimem = &imem->base; return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/priv.h index ace4471864a3..44651ca42d52 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/priv.h @@ -11,10 +11,22 @@ struct nvkm_instmem_func { void (*wr32)(struct nvkm_instmem *, u32 addr, u32 data); int (*memory_new)(struct nvkm_instmem *, u32 size, u32 align, bool zero, struct nvkm_memory **); - bool persistent; bool zero; }; void nvkm_instmem_ctor(const struct nvkm_instmem_func *, struct nvkm_device *, int index, struct nvkm_instmem *); +void nvkm_instmem_boot(struct nvkm_instmem *); + +#include <core/memory.h> + +struct nvkm_instobj { + struct nvkm_memory memory; + struct list_head head; + u32 *suspend; +}; + +void nvkm_instobj_ctor(const struct nvkm_memory_func *func, + struct nvkm_instmem *, struct nvkm_instobj *); +void nvkm_instobj_dtor(struct nvkm_instmem *, struct nvkm_instobj *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c index 0c7ef250dcaf..1f185274d3e6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c @@ -23,26 +23,12 @@ */ #include "priv.h" -#include <subdev/fb.h> - -int -nvkm_ltc_tags_alloc(struct nvkm_ltc *ltc, u32 n, struct nvkm_mm_node **pnode) -{ - int ret = nvkm_mm_head(<c->tags, 0, 1, n, n, 1, pnode); - if (ret) - *pnode = NULL; - return ret; -} - -void -nvkm_ltc_tags_free(struct nvkm_ltc *ltc, struct nvkm_mm_node **pnode) -{ - nvkm_mm_free(<c->tags, pnode); -} +#include <core/memory.h> void -nvkm_ltc_tags_clear(struct nvkm_ltc *ltc, u32 first, u32 count) +nvkm_ltc_tags_clear(struct nvkm_device *device, u32 first, u32 count) { + struct nvkm_ltc *ltc = device->ltc; const u32 limit = first + count - 1; BUG_ON((first > limit) || (limit >= ltc->num_tags)); @@ -116,10 +102,7 @@ static void * nvkm_ltc_dtor(struct nvkm_subdev *subdev) { struct nvkm_ltc *ltc = nvkm_ltc(subdev); - struct nvkm_ram *ram = ltc->subdev.device->fb->ram; - nvkm_mm_fini(<c->tags); - if (ram) - nvkm_mm_free(&ram->vram, <c->tag_ram); + nvkm_memory_unref(<c->tag_ram); return ltc; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c index 4a0fa0a9b802..a21ef45b8572 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c @@ -23,6 +23,7 @@ */ #include "priv.h" +#include <core/memory.h> #include <subdev/fb.h> #include <subdev/timer.h> @@ -152,7 +153,10 @@ gf100_ltc_flush(struct nvkm_ltc *ltc) int gf100_ltc_oneinit_tag_ram(struct nvkm_ltc *ltc) { - struct nvkm_ram *ram = ltc->subdev.device->fb->ram; + struct nvkm_device *device = ltc->subdev.device; + struct nvkm_fb *fb = device->fb; + struct nvkm_ram *ram = fb->ram; + u32 bits = (nvkm_rd32(device, 0x100c80) & 0x00001000) ? 16 : 17; u32 tag_size, tag_margin, tag_align; int ret; @@ -164,8 +168,8 @@ gf100_ltc_oneinit_tag_ram(struct nvkm_ltc *ltc) /* tags for 1/4 of VRAM should be enough (8192/4 per GiB of VRAM) */ ltc->num_tags = (ram->size >> 17) / 4; - if (ltc->num_tags > (1 << 17)) - ltc->num_tags = 1 << 17; /* we have 17 bits in PTE */ + if (ltc->num_tags > (1 << bits)) + ltc->num_tags = 1 << bits; /* we have 16/17 bits in PTE */ ltc->num_tags = (ltc->num_tags + 63) & ~63; /* round up to 64 */ tag_align = ltc->ltc_nr * 0x800; @@ -181,14 +185,13 @@ gf100_ltc_oneinit_tag_ram(struct nvkm_ltc *ltc) */ tag_size = (ltc->num_tags / 64) * 0x6000 + tag_margin; tag_size += tag_align; - tag_size = (tag_size + 0xfff) >> 12; /* round up */ - ret = nvkm_mm_tail(&ram->vram, 1, 1, tag_size, tag_size, 1, - <c->tag_ram); + ret = nvkm_ram_get(device, NVKM_RAM_MM_NORMAL, 0x01, 12, tag_size, + true, true, <c->tag_ram); if (ret) { ltc->num_tags = 0; } else { - u64 tag_base = ((u64)ltc->tag_ram->offset << 12) + tag_margin; + u64 tag_base = nvkm_memory_addr(ltc->tag_ram) + tag_margin; tag_base += tag_align - 1; do_div(tag_base, tag_align); @@ -197,7 +200,8 @@ gf100_ltc_oneinit_tag_ram(struct nvkm_ltc *ltc) } mm_init: - return nvkm_mm_init(<c->tags, 0, ltc->num_tags, 1); + nvkm_mm_fini(&fb->tags); + return nvkm_mm_init(&fb->tags, 0, 0, ltc->num_tags, 1); } int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c index 0bdfb2f40266..e34d42108019 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c @@ -45,7 +45,7 @@ gp100_ltc_oneinit(struct nvkm_ltc *ltc) ltc->ltc_nr = nvkm_rd32(device, 0x12006c); ltc->lts_nr = nvkm_rd32(device, 0x17e280) >> 28; /*XXX: tagram allocation - TBD */ - return nvkm_mm_init(<c->tags, 0, 0, 1); + return 0; } static void diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild index 012c9db687b2..352a65f9371c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild @@ -3,4 +3,33 @@ nvkm-y += nvkm/subdev/mmu/nv04.o nvkm-y += nvkm/subdev/mmu/nv41.o nvkm-y += nvkm/subdev/mmu/nv44.o nvkm-y += nvkm/subdev/mmu/nv50.o +nvkm-y += nvkm/subdev/mmu/g84.o nvkm-y += nvkm/subdev/mmu/gf100.o +nvkm-y += nvkm/subdev/mmu/gk104.o +nvkm-y += nvkm/subdev/mmu/gk20a.o +nvkm-y += nvkm/subdev/mmu/gm200.o +nvkm-y += nvkm/subdev/mmu/gm20b.o +nvkm-y += nvkm/subdev/mmu/gp100.o +nvkm-y += nvkm/subdev/mmu/gp10b.o + +nvkm-y += nvkm/subdev/mmu/mem.o +nvkm-y += nvkm/subdev/mmu/memnv04.o +nvkm-y += nvkm/subdev/mmu/memnv50.o +nvkm-y += nvkm/subdev/mmu/memgf100.o + +nvkm-y += nvkm/subdev/mmu/vmm.o +nvkm-y += nvkm/subdev/mmu/vmmnv04.o +nvkm-y += nvkm/subdev/mmu/vmmnv41.o +nvkm-y += nvkm/subdev/mmu/vmmnv44.o +nvkm-y += nvkm/subdev/mmu/vmmnv50.o +nvkm-y += nvkm/subdev/mmu/vmmgf100.o +nvkm-y += nvkm/subdev/mmu/vmmgk104.o +nvkm-y += nvkm/subdev/mmu/vmmgk20a.o +nvkm-y += nvkm/subdev/mmu/vmmgm200.o +nvkm-y += nvkm/subdev/mmu/vmmgm20b.o +nvkm-y += nvkm/subdev/mmu/vmmgp100.o +nvkm-y += nvkm/subdev/mmu/vmmgp10b.o + +nvkm-y += nvkm/subdev/mmu/umem.o +nvkm-y += nvkm/subdev/mmu/ummu.o +nvkm-y += nvkm/subdev/mmu/uvmm.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c index d06ad2c372bf..ee11ccaf0563 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c @@ -21,478 +21,367 @@ * * Authors: Ben Skeggs */ -#include "priv.h" +#include "ummu.h" +#include "vmm.h" -#include <core/gpuobj.h> +#include <subdev/bar.h> #include <subdev/fb.h> -void -nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node) -{ - struct nvkm_vm *vm = vma->vm; - struct nvkm_mmu *mmu = vm->mmu; - struct nvkm_mm_node *r = node->mem; - int big = vma->node->type != mmu->func->spg_shift; - u32 offset = vma->node->offset + (delta >> 12); - u32 bits = vma->node->type - 12; - u32 pde = (offset >> mmu->func->pgt_bits) - vm->fpde; - u32 pte = (offset & ((1 << mmu->func->pgt_bits) - 1)) >> bits; - u32 max = 1 << (mmu->func->pgt_bits - bits); - u32 end, len; - - delta = 0; - while (r) { - u64 phys = (u64)r->offset << 12; - u32 num = r->length >> bits; - - while (num) { - struct nvkm_memory *pgt = vm->pgt[pde].mem[big]; - - end = (pte + num); - if (unlikely(end >= max)) - end = max; - len = end - pte; - - mmu->func->map(vma, pgt, node, pte, len, phys, delta); - - num -= len; - pte += len; - if (unlikely(end >= max)) { - phys += len << (bits + 12); - pde++; - pte = 0; - } - - delta += (u64)len << vma->node->type; - } - r = r->next; - }; +#include <nvif/if500d.h> +#include <nvif/if900d.h> - mmu->func->flush(vm); -} +struct nvkm_mmu_ptp { + struct nvkm_mmu_pt *pt; + struct list_head head; + u8 shift; + u16 mask; + u16 free; +}; static void -nvkm_vm_map_sg_table(struct nvkm_vma *vma, u64 delta, u64 length, - struct nvkm_mem *mem) +nvkm_mmu_ptp_put(struct nvkm_mmu *mmu, bool force, struct nvkm_mmu_pt *pt) { - struct nvkm_vm *vm = vma->vm; - struct nvkm_mmu *mmu = vm->mmu; - int big = vma->node->type != mmu->func->spg_shift; - u32 offset = vma->node->offset + (delta >> 12); - u32 bits = vma->node->type - 12; - u32 num = length >> vma->node->type; - u32 pde = (offset >> mmu->func->pgt_bits) - vm->fpde; - u32 pte = (offset & ((1 << mmu->func->pgt_bits) - 1)) >> bits; - u32 max = 1 << (mmu->func->pgt_bits - bits); - unsigned m, sglen; - u32 end, len; - int i; - struct scatterlist *sg; - - for_each_sg(mem->sg->sgl, sg, mem->sg->nents, i) { - struct nvkm_memory *pgt = vm->pgt[pde].mem[big]; - sglen = sg_dma_len(sg) >> PAGE_SHIFT; - - end = pte + sglen; - if (unlikely(end >= max)) - end = max; - len = end - pte; - - for (m = 0; m < len; m++) { - dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT); - - mmu->func->map_sg(vma, pgt, mem, pte, 1, &addr); - num--; - pte++; - - if (num == 0) - goto finish; - } - if (unlikely(end >= max)) { - pde++; - pte = 0; - } - if (m < sglen) { - for (; m < sglen; m++) { - dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT); - - mmu->func->map_sg(vma, pgt, mem, pte, 1, &addr); - num--; - pte++; - if (num == 0) - goto finish; - } - } - + const int slot = pt->base >> pt->ptp->shift; + struct nvkm_mmu_ptp *ptp = pt->ptp; + + /* If there were no free slots in the parent allocation before, + * there will be now, so return PTP to the cache. + */ + if (!ptp->free) + list_add(&ptp->head, &mmu->ptp.list); + ptp->free |= BIT(slot); + + /* If there's no more sub-allocations, destroy PTP. */ + if (ptp->free == ptp->mask) { + nvkm_mmu_ptc_put(mmu, force, &ptp->pt); + list_del(&ptp->head); + kfree(ptp); } -finish: - mmu->func->flush(vm); + + kfree(pt); } -static void -nvkm_vm_map_sg(struct nvkm_vma *vma, u64 delta, u64 length, - struct nvkm_mem *mem) +struct nvkm_mmu_pt * +nvkm_mmu_ptp_get(struct nvkm_mmu *mmu, u32 size, bool zero) { - struct nvkm_vm *vm = vma->vm; - struct nvkm_mmu *mmu = vm->mmu; - dma_addr_t *list = mem->pages; - int big = vma->node->type != mmu->func->spg_shift; - u32 offset = vma->node->offset + (delta >> 12); - u32 bits = vma->node->type - 12; - u32 num = length >> vma->node->type; - u32 pde = (offset >> mmu->func->pgt_bits) - vm->fpde; - u32 pte = (offset & ((1 << mmu->func->pgt_bits) - 1)) >> bits; - u32 max = 1 << (mmu->func->pgt_bits - bits); - u32 end, len; - - while (num) { - struct nvkm_memory *pgt = vm->pgt[pde].mem[big]; - - end = (pte + num); - if (unlikely(end >= max)) - end = max; - len = end - pte; - - mmu->func->map_sg(vma, pgt, mem, pte, len, list); - - num -= len; - pte += len; - list += len; - if (unlikely(end >= max)) { - pde++; - pte = 0; + struct nvkm_mmu_pt *pt; + struct nvkm_mmu_ptp *ptp; + int slot; + + if (!(pt = kzalloc(sizeof(*pt), GFP_KERNEL))) + return NULL; + + ptp = list_first_entry_or_null(&mmu->ptp.list, typeof(*ptp), head); + if (!ptp) { + /* Need to allocate a new parent to sub-allocate from. */ + if (!(ptp = kmalloc(sizeof(*ptp), GFP_KERNEL))) { + kfree(pt); + return NULL; } - } - mmu->func->flush(vm); -} + ptp->pt = nvkm_mmu_ptc_get(mmu, 0x1000, 0x1000, false); + if (!ptp->pt) { + kfree(ptp); + kfree(pt); + return NULL; + } -void -nvkm_vm_map(struct nvkm_vma *vma, struct nvkm_mem *node) -{ - if (node->sg) - nvkm_vm_map_sg_table(vma, 0, node->size << 12, node); - else - if (node->pages) - nvkm_vm_map_sg(vma, 0, node->size << 12, node); - else - nvkm_vm_map_at(vma, 0, node); + ptp->shift = order_base_2(size); + slot = nvkm_memory_size(ptp->pt->memory) >> ptp->shift; + ptp->mask = (1 << slot) - 1; + ptp->free = ptp->mask; + list_add(&ptp->head, &mmu->ptp.list); + } + pt->ptp = ptp; + pt->sub = true; + + /* Sub-allocate from parent object, removing PTP from cache + * if there's no more free slots left. + */ + slot = __ffs(ptp->free); + ptp->free &= ~BIT(slot); + if (!ptp->free) + list_del(&ptp->head); + + pt->memory = pt->ptp->pt->memory; + pt->base = slot << ptp->shift; + pt->addr = pt->ptp->pt->addr + pt->base; + return pt; } -void -nvkm_vm_unmap_at(struct nvkm_vma *vma, u64 delta, u64 length) +struct nvkm_mmu_ptc { + struct list_head head; + struct list_head item; + u32 size; + u32 refs; +}; + +static inline struct nvkm_mmu_ptc * +nvkm_mmu_ptc_find(struct nvkm_mmu *mmu, u32 size) { - struct nvkm_vm *vm = vma->vm; - struct nvkm_mmu *mmu = vm->mmu; - int big = vma->node->type != mmu->func->spg_shift; - u32 offset = vma->node->offset + (delta >> 12); - u32 bits = vma->node->type - 12; - u32 num = length >> vma->node->type; - u32 pde = (offset >> mmu->func->pgt_bits) - vm->fpde; - u32 pte = (offset & ((1 << mmu->func->pgt_bits) - 1)) >> bits; - u32 max = 1 << (mmu->func->pgt_bits - bits); - u32 end, len; - - while (num) { - struct nvkm_memory *pgt = vm->pgt[pde].mem[big]; - - end = (pte + num); - if (unlikely(end >= max)) - end = max; - len = end - pte; - - mmu->func->unmap(vma, pgt, pte, len); - - num -= len; - pte += len; - if (unlikely(end >= max)) { - pde++; - pte = 0; - } + struct nvkm_mmu_ptc *ptc; + + list_for_each_entry(ptc, &mmu->ptc.list, head) { + if (ptc->size == size) + return ptc; } - mmu->func->flush(vm); -} + ptc = kmalloc(sizeof(*ptc), GFP_KERNEL); + if (ptc) { + INIT_LIST_HEAD(&ptc->item); + ptc->size = size; + ptc->refs = 0; + list_add(&ptc->head, &mmu->ptc.list); + } -void -nvkm_vm_unmap(struct nvkm_vma *vma) -{ - nvkm_vm_unmap_at(vma, 0, (u64)vma->node->length << 12); + return ptc; } -static void -nvkm_vm_unmap_pgt(struct nvkm_vm *vm, int big, u32 fpde, u32 lpde) +void +nvkm_mmu_ptc_put(struct nvkm_mmu *mmu, bool force, struct nvkm_mmu_pt **ppt) { - struct nvkm_mmu *mmu = vm->mmu; - struct nvkm_vm_pgd *vpgd; - struct nvkm_vm_pgt *vpgt; - struct nvkm_memory *pgt; - u32 pde; - - for (pde = fpde; pde <= lpde; pde++) { - vpgt = &vm->pgt[pde - vm->fpde]; - if (--vpgt->refcount[big]) - continue; - - pgt = vpgt->mem[big]; - vpgt->mem[big] = NULL; - - list_for_each_entry(vpgd, &vm->pgd_list, head) { - mmu->func->map_pgt(vpgd->obj, pde, vpgt->mem); + struct nvkm_mmu_pt *pt = *ppt; + if (pt) { + /* Handle sub-allocated page tables. */ + if (pt->sub) { + mutex_lock(&mmu->ptp.mutex); + nvkm_mmu_ptp_put(mmu, force, pt); + mutex_unlock(&mmu->ptp.mutex); + return; } - nvkm_memory_del(&pgt); + /* Either cache or free the object. */ + mutex_lock(&mmu->ptc.mutex); + if (pt->ptc->refs < 8 /* Heuristic. */ && !force) { + list_add_tail(&pt->head, &pt->ptc->item); + pt->ptc->refs++; + } else { + nvkm_memory_unref(&pt->memory); + kfree(pt); + } + mutex_unlock(&mmu->ptc.mutex); } } -static int -nvkm_vm_map_pgt(struct nvkm_vm *vm, u32 pde, u32 type) +struct nvkm_mmu_pt * +nvkm_mmu_ptc_get(struct nvkm_mmu *mmu, u32 size, u32 align, bool zero) { - struct nvkm_mmu *mmu = vm->mmu; - struct nvkm_vm_pgt *vpgt = &vm->pgt[pde - vm->fpde]; - struct nvkm_vm_pgd *vpgd; - int big = (type != mmu->func->spg_shift); - u32 pgt_size; + struct nvkm_mmu_ptc *ptc; + struct nvkm_mmu_pt *pt; int ret; - pgt_size = (1 << (mmu->func->pgt_bits + 12)) >> type; - pgt_size *= 8; - - ret = nvkm_memory_new(mmu->subdev.device, NVKM_MEM_TARGET_INST, - pgt_size, 0x1000, true, &vpgt->mem[big]); - if (unlikely(ret)) - return ret; - - list_for_each_entry(vpgd, &vm->pgd_list, head) { - mmu->func->map_pgt(vpgd->obj, pde, vpgt->mem); + /* Sub-allocated page table (ie. GP100 LPT). */ + if (align < 0x1000) { + mutex_lock(&mmu->ptp.mutex); + pt = nvkm_mmu_ptp_get(mmu, align, zero); + mutex_unlock(&mmu->ptp.mutex); + return pt; } - vpgt->refcount[big]++; - return 0; -} - -int -nvkm_vm_get(struct nvkm_vm *vm, u64 size, u32 page_shift, u32 access, - struct nvkm_vma *vma) -{ - struct nvkm_mmu *mmu = vm->mmu; - u32 align = (1 << page_shift) >> 12; - u32 msize = size >> 12; - u32 fpde, lpde, pde; - int ret; - - mutex_lock(&vm->mutex); - ret = nvkm_mm_head(&vm->mm, 0, page_shift, msize, msize, align, - &vma->node); - if (unlikely(ret != 0)) { - mutex_unlock(&vm->mutex); - return ret; + /* Lookup cache for this page table size. */ + mutex_lock(&mmu->ptc.mutex); + ptc = nvkm_mmu_ptc_find(mmu, size); + if (!ptc) { + mutex_unlock(&mmu->ptc.mutex); + return NULL; } - fpde = (vma->node->offset >> mmu->func->pgt_bits); - lpde = (vma->node->offset + vma->node->length - 1) >> mmu->func->pgt_bits; - - for (pde = fpde; pde <= lpde; pde++) { - struct nvkm_vm_pgt *vpgt = &vm->pgt[pde - vm->fpde]; - int big = (vma->node->type != mmu->func->spg_shift); + /* If there's a free PT in the cache, reuse it. */ + pt = list_first_entry_or_null(&ptc->item, typeof(*pt), head); + if (pt) { + if (zero) + nvkm_fo64(pt->memory, 0, 0, size >> 3); + list_del(&pt->head); + ptc->refs--; + mutex_unlock(&mmu->ptc.mutex); + return pt; + } + mutex_unlock(&mmu->ptc.mutex); - if (likely(vpgt->refcount[big])) { - vpgt->refcount[big]++; - continue; - } + /* No such luck, we need to allocate. */ + if (!(pt = kmalloc(sizeof(*pt), GFP_KERNEL))) + return NULL; + pt->ptc = ptc; + pt->sub = false; - ret = nvkm_vm_map_pgt(vm, pde, vma->node->type); - if (ret) { - if (pde != fpde) - nvkm_vm_unmap_pgt(vm, big, fpde, pde - 1); - nvkm_mm_free(&vm->mm, &vma->node); - mutex_unlock(&vm->mutex); - return ret; - } + ret = nvkm_memory_new(mmu->subdev.device, NVKM_MEM_TARGET_INST, + size, align, zero, &pt->memory); + if (ret) { + kfree(pt); + return NULL; } - mutex_unlock(&vm->mutex); - vma->vm = NULL; - nvkm_vm_ref(vm, &vma->vm, NULL); - vma->offset = (u64)vma->node->offset << 12; - vma->access = access; - return 0; + pt->base = 0; + pt->addr = nvkm_memory_addr(pt->memory); + return pt; } void -nvkm_vm_put(struct nvkm_vma *vma) +nvkm_mmu_ptc_dump(struct nvkm_mmu *mmu) { - struct nvkm_mmu *mmu; - struct nvkm_vm *vm; - u32 fpde, lpde; - - if (unlikely(vma->node == NULL)) - return; - vm = vma->vm; - mmu = vm->mmu; - - fpde = (vma->node->offset >> mmu->func->pgt_bits); - lpde = (vma->node->offset + vma->node->length - 1) >> mmu->func->pgt_bits; - - mutex_lock(&vm->mutex); - nvkm_vm_unmap_pgt(vm, vma->node->type != mmu->func->spg_shift, fpde, lpde); - nvkm_mm_free(&vm->mm, &vma->node); - mutex_unlock(&vm->mutex); - - nvkm_vm_ref(NULL, &vma->vm, NULL); -} - -int -nvkm_vm_boot(struct nvkm_vm *vm, u64 size) -{ - struct nvkm_mmu *mmu = vm->mmu; - struct nvkm_memory *pgt; - int ret; - - ret = nvkm_memory_new(mmu->subdev.device, NVKM_MEM_TARGET_INST, - (size >> mmu->func->spg_shift) * 8, 0x1000, true, &pgt); - if (ret == 0) { - vm->pgt[0].refcount[0] = 1; - vm->pgt[0].mem[0] = pgt; - nvkm_memory_boot(pgt, vm); + struct nvkm_mmu_ptc *ptc; + list_for_each_entry(ptc, &mmu->ptc.list, head) { + struct nvkm_mmu_pt *pt, *tt; + list_for_each_entry_safe(pt, tt, &ptc->item, head) { + nvkm_memory_unref(&pt->memory); + list_del(&pt->head); + kfree(pt); + } } - - return ret; } -int -nvkm_vm_create(struct nvkm_mmu *mmu, u64 offset, u64 length, u64 mm_offset, - u32 block, struct lock_class_key *key, struct nvkm_vm **pvm) +static void +nvkm_mmu_ptc_fini(struct nvkm_mmu *mmu) { - static struct lock_class_key _key; - struct nvkm_vm *vm; - u64 mm_length = (offset + length) - mm_offset; - int ret; - - vm = kzalloc(sizeof(*vm), GFP_KERNEL); - if (!vm) - return -ENOMEM; - - __mutex_init(&vm->mutex, "&vm->mutex", key ? key : &_key); - INIT_LIST_HEAD(&vm->pgd_list); - vm->mmu = mmu; - kref_init(&vm->refcount); - vm->fpde = offset >> (mmu->func->pgt_bits + 12); - vm->lpde = (offset + length - 1) >> (mmu->func->pgt_bits + 12); - - vm->pgt = vzalloc((vm->lpde - vm->fpde + 1) * sizeof(*vm->pgt)); - if (!vm->pgt) { - kfree(vm); - return -ENOMEM; - } + struct nvkm_mmu_ptc *ptc, *ptct; - ret = nvkm_mm_init(&vm->mm, mm_offset >> 12, mm_length >> 12, - block >> 12); - if (ret) { - vfree(vm->pgt); - kfree(vm); - return ret; + list_for_each_entry_safe(ptc, ptct, &mmu->ptc.list, head) { + WARN_ON(!list_empty(&ptc->item)); + list_del(&ptc->head); + kfree(ptc); } - - *pvm = vm; - - return 0; } -int -nvkm_vm_new(struct nvkm_device *device, u64 offset, u64 length, u64 mm_offset, - struct lock_class_key *key, struct nvkm_vm **pvm) +static void +nvkm_mmu_ptc_init(struct nvkm_mmu *mmu) { - struct nvkm_mmu *mmu = device->mmu; - if (!mmu->func->create) - return -EINVAL; - return mmu->func->create(mmu, offset, length, mm_offset, key, pvm); + mutex_init(&mmu->ptc.mutex); + INIT_LIST_HEAD(&mmu->ptc.list); + mutex_init(&mmu->ptp.mutex); + INIT_LIST_HEAD(&mmu->ptp.list); } -static int -nvkm_vm_link(struct nvkm_vm *vm, struct nvkm_gpuobj *pgd) +static void +nvkm_mmu_type(struct nvkm_mmu *mmu, int heap, u8 type) { - struct nvkm_mmu *mmu = vm->mmu; - struct nvkm_vm_pgd *vpgd; - int i; - - if (!pgd) - return 0; - - vpgd = kzalloc(sizeof(*vpgd), GFP_KERNEL); - if (!vpgd) - return -ENOMEM; - - vpgd->obj = pgd; - - mutex_lock(&vm->mutex); - for (i = vm->fpde; i <= vm->lpde; i++) - mmu->func->map_pgt(pgd, i, vm->pgt[i - vm->fpde].mem); - list_add(&vpgd->head, &vm->pgd_list); - mutex_unlock(&vm->mutex); - return 0; + if (heap >= 0 && !WARN_ON(mmu->type_nr == ARRAY_SIZE(mmu->type))) { + mmu->type[mmu->type_nr].type = type | mmu->heap[heap].type; + mmu->type[mmu->type_nr].heap = heap; + mmu->type_nr++; + } } -static void -nvkm_vm_unlink(struct nvkm_vm *vm, struct nvkm_gpuobj *mpgd) +static int +nvkm_mmu_heap(struct nvkm_mmu *mmu, u8 type, u64 size) { - struct nvkm_vm_pgd *vpgd, *tmp; - - if (!mpgd) - return; - - mutex_lock(&vm->mutex); - list_for_each_entry_safe(vpgd, tmp, &vm->pgd_list, head) { - if (vpgd->obj == mpgd) { - list_del(&vpgd->head); - kfree(vpgd); - break; + if (size) { + if (!WARN_ON(mmu->heap_nr == ARRAY_SIZE(mmu->heap))) { + mmu->heap[mmu->heap_nr].type = type; + mmu->heap[mmu->heap_nr].size = size; + return mmu->heap_nr++; } } - mutex_unlock(&vm->mutex); + return -EINVAL; } static void -nvkm_vm_del(struct kref *kref) +nvkm_mmu_host(struct nvkm_mmu *mmu) { - struct nvkm_vm *vm = container_of(kref, typeof(*vm), refcount); - struct nvkm_vm_pgd *vpgd, *tmp; - - list_for_each_entry_safe(vpgd, tmp, &vm->pgd_list, head) { - nvkm_vm_unlink(vm, vpgd->obj); - } - - nvkm_mm_fini(&vm->mm); - vfree(vm->pgt); - kfree(vm); + struct nvkm_device *device = mmu->subdev.device; + u8 type = NVKM_MEM_KIND * !!mmu->func->kind_sys; + int heap; + + /* Non-mappable system memory. */ + heap = nvkm_mmu_heap(mmu, NVKM_MEM_HOST, ~0ULL); + nvkm_mmu_type(mmu, heap, type); + + /* Non-coherent, cached, system memory. + * + * Block-linear mappings of system memory must be done through + * BAR1, and cannot be supported on systems where we're unable + * to map BAR1 with write-combining. + */ + type |= NVKM_MEM_MAPPABLE; + if (!device->bar || device->bar->iomap_uncached) + nvkm_mmu_type(mmu, heap, type & ~NVKM_MEM_KIND); + else + nvkm_mmu_type(mmu, heap, type); + + /* Coherent, cached, system memory. + * + * Unsupported on systems that aren't able to support snooped + * mappings, and also for block-linear mappings which must be + * done through BAR1. + */ + type |= NVKM_MEM_COHERENT; + if (device->func->cpu_coherent) + nvkm_mmu_type(mmu, heap, type & ~NVKM_MEM_KIND); + + /* Uncached system memory. */ + nvkm_mmu_type(mmu, heap, type |= NVKM_MEM_UNCACHED); } -int -nvkm_vm_ref(struct nvkm_vm *ref, struct nvkm_vm **ptr, struct nvkm_gpuobj *pgd) +static void +nvkm_mmu_vram(struct nvkm_mmu *mmu) { - if (ref) { - int ret = nvkm_vm_link(ref, pgd); - if (ret) - return ret; - - kref_get(&ref->refcount); - } + struct nvkm_device *device = mmu->subdev.device; + struct nvkm_mm *mm = &device->fb->ram->vram; + const u32 sizeN = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NORMAL); + const u32 sizeU = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NOMAP); + const u32 sizeM = nvkm_mm_heap_size(mm, NVKM_RAM_MM_MIXED); + u8 type = NVKM_MEM_KIND * !!mmu->func->kind; + u8 heap = NVKM_MEM_VRAM; + int heapM, heapN, heapU; + + /* Mixed-memory doesn't support compression or display. */ + heapM = nvkm_mmu_heap(mmu, heap, sizeM << NVKM_RAM_MM_SHIFT); + + heap |= NVKM_MEM_COMP; + heap |= NVKM_MEM_DISP; + heapN = nvkm_mmu_heap(mmu, heap, sizeN << NVKM_RAM_MM_SHIFT); + heapU = nvkm_mmu_heap(mmu, heap, sizeU << NVKM_RAM_MM_SHIFT); + + /* Add non-mappable VRAM types first so that they're preferred + * over anything else. Mixed-memory will be slower than other + * heaps, it's prioritised last. + */ + nvkm_mmu_type(mmu, heapU, type); + nvkm_mmu_type(mmu, heapN, type); + nvkm_mmu_type(mmu, heapM, type); + + /* Add host memory types next, under the assumption that users + * wanting mappable memory want to use them as staging buffers + * or the like. + */ + nvkm_mmu_host(mmu); + + /* Mappable VRAM types go last, as they're basically the worst + * possible type to ask for unless there's no other choice. + */ + if (device->bar) { + /* Write-combined BAR1 access. */ + type |= NVKM_MEM_MAPPABLE; + if (!device->bar->iomap_uncached) { + nvkm_mmu_type(mmu, heapN, type); + nvkm_mmu_type(mmu, heapM, type); + } - if (*ptr) { - nvkm_vm_unlink(*ptr, pgd); - kref_put(&(*ptr)->refcount, nvkm_vm_del); + /* Uncached BAR1 access. */ + type |= NVKM_MEM_COHERENT; + type |= NVKM_MEM_UNCACHED; + nvkm_mmu_type(mmu, heapN, type); + nvkm_mmu_type(mmu, heapM, type); } - - *ptr = ref; - return 0; } static int nvkm_mmu_oneinit(struct nvkm_subdev *subdev) { struct nvkm_mmu *mmu = nvkm_mmu(subdev); - if (mmu->func->oneinit) - return mmu->func->oneinit(mmu); + + /* Determine available memory types. */ + if (mmu->subdev.device->fb && mmu->subdev.device->fb->ram) + nvkm_mmu_vram(mmu); + else + nvkm_mmu_host(mmu); + + if (mmu->func->vmm.global) { + int ret = nvkm_vmm_new(subdev->device, 0, 0, NULL, 0, NULL, + "gart", &mmu->vmm); + if (ret) + return ret; + } + return 0; } @@ -509,8 +398,10 @@ static void * nvkm_mmu_dtor(struct nvkm_subdev *subdev) { struct nvkm_mmu *mmu = nvkm_mmu(subdev); - if (mmu->func->dtor) - return mmu->func->dtor(mmu); + + nvkm_vmm_unref(&mmu->vmm); + + nvkm_mmu_ptc_fini(mmu); return mmu; } @@ -527,9 +418,10 @@ nvkm_mmu_ctor(const struct nvkm_mmu_func *func, struct nvkm_device *device, { nvkm_subdev_ctor(&nvkm_mmu, device, index, &mmu->subdev); mmu->func = func; - mmu->limit = func->limit; mmu->dma_bits = func->dma_bits; - mmu->lpg_shift = func->lpg_shift; + nvkm_mmu_ptc_init(mmu); + mmu->user.ctor = nvkm_ummu_new; + mmu->user.base = func->mmu.user; } int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/g84.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/g84.c new file mode 100644 index 000000000000..8accda5a772b --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/g84.c @@ -0,0 +1,41 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" +#include "vmm.h" + +#include <nvif/class.h> + +static const struct nvkm_mmu_func +g84_mmu = { + .dma_bits = 40, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_NV50}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_NV50}, nv50_mem_new, nv50_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_NV50}, nv50_vmm_new, false, 0x0200 }, + .kind = nv50_mmu_kind, + .kind_sys = true, +}; + +int +g84_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + return nvkm_mmu_new_(&g84_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gf100.c index 7ac507c927bb..2d075246dc46 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gf100.c @@ -21,197 +21,65 @@ * * Authors: Ben Skeggs */ -#include "priv.h" +#include "mem.h" +#include "vmm.h" -#include <subdev/fb.h> -#include <subdev/ltc.h> -#include <subdev/timer.h> - -#include <core/gpuobj.h> +#include <nvif/class.h> /* Map from compressed to corresponding uncompressed storage type. * The value 0xff represents an invalid storage type. */ -const u8 gf100_pte_storage_type_map[256] = -{ - 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0xff, 0x01, /* 0x00 */ - 0x01, 0x01, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0x11, 0xff, 0xff, 0xff, 0xff, 0xff, 0x11, /* 0x10 */ - 0x11, 0x11, 0x11, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x26, 0x27, /* 0x20 */ - 0x28, 0x29, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x30 */ - 0xff, 0xff, 0x26, 0x27, 0x28, 0x29, 0x26, 0x27, - 0x28, 0x29, 0xff, 0xff, 0xff, 0xff, 0x46, 0xff, /* 0x40 */ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0x46, 0x46, 0x46, 0x46, 0xff, 0xff, 0xff, /* 0x50 */ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x60 */ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70 */ - 0xff, 0xff, 0xff, 0x7b, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7b, 0x7b, /* 0x80 */ - 0x7b, 0x7b, 0xff, 0x8b, 0x8c, 0x8d, 0x8e, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90 */ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0x8b, 0x8c, 0x8d, 0x8e, 0xa7, /* 0xa0 */ - 0xa8, 0xa9, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0 */ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa7, - 0xa8, 0xa9, 0xaa, 0xc3, 0xff, 0xff, 0xff, 0xff, /* 0xc0 */ - 0xff, 0xff, 0xff, 0xff, 0xfe, 0xfe, 0xc3, 0xc3, - 0xc3, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0 */ - 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, - 0xfe, 0xff, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff, /* 0xe0 */ - 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, 0xfe, 0xff, - 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xf0 */ - 0xfe, 0xfe, 0xfe, 0xfe, 0xff, 0xfd, 0xfe, 0xff -}; - - -static void -gf100_vm_map_pgt(struct nvkm_gpuobj *pgd, u32 index, struct nvkm_memory *pgt[2]) -{ - u32 pde[2] = { 0, 0 }; - - if (pgt[0]) - pde[1] = 0x00000001 | (nvkm_memory_addr(pgt[0]) >> 8); - if (pgt[1]) - pde[0] = 0x00000001 | (nvkm_memory_addr(pgt[1]) >> 8); - - nvkm_kmap(pgd); - nvkm_wo32(pgd, (index * 8) + 0, pde[0]); - nvkm_wo32(pgd, (index * 8) + 4, pde[1]); - nvkm_done(pgd); -} - -static inline u64 -gf100_vm_addr(struct nvkm_vma *vma, u64 phys, u32 memtype, u32 target) -{ - phys >>= 8; - - phys |= 0x00000001; /* present */ - if (vma->access & NV_MEM_ACCESS_SYS) - phys |= 0x00000002; - - phys |= ((u64)target << 32); - phys |= ((u64)memtype << 36); - return phys; -} - -static void -gf100_vm_map(struct nvkm_vma *vma, struct nvkm_memory *pgt, - struct nvkm_mem *mem, u32 pte, u32 cnt, u64 phys, u64 delta) -{ - u64 next = 1 << (vma->node->type - 8); - - phys = gf100_vm_addr(vma, phys, mem->memtype, 0); - pte <<= 3; - - if (mem->tag) { - struct nvkm_ltc *ltc = vma->vm->mmu->subdev.device->ltc; - u32 tag = mem->tag->offset + (delta >> 17); - phys |= (u64)tag << (32 + 12); - next |= (u64)1 << (32 + 12); - nvkm_ltc_tags_clear(ltc, tag, cnt); - } - - nvkm_kmap(pgt); - while (cnt--) { - nvkm_wo32(pgt, pte + 0, lower_32_bits(phys)); - nvkm_wo32(pgt, pte + 4, upper_32_bits(phys)); - phys += next; - pte += 8; - } - nvkm_done(pgt); -} - -static void -gf100_vm_map_sg(struct nvkm_vma *vma, struct nvkm_memory *pgt, - struct nvkm_mem *mem, u32 pte, u32 cnt, dma_addr_t *list) -{ - u32 target = (vma->access & NV_MEM_ACCESS_NOSNOOP) ? 7 : 5; - /* compressed storage types are invalid for system memory */ - u32 memtype = gf100_pte_storage_type_map[mem->memtype & 0xff]; - - nvkm_kmap(pgt); - pte <<= 3; - while (cnt--) { - u64 phys = gf100_vm_addr(vma, *list++, memtype, target); - nvkm_wo32(pgt, pte + 0, lower_32_bits(phys)); - nvkm_wo32(pgt, pte + 4, upper_32_bits(phys)); - pte += 8; - } - nvkm_done(pgt); -} - -static void -gf100_vm_unmap(struct nvkm_vma *vma, struct nvkm_memory *pgt, u32 pte, u32 cnt) -{ - nvkm_kmap(pgt); - pte <<= 3; - while (cnt--) { - nvkm_wo32(pgt, pte + 0, 0x00000000); - nvkm_wo32(pgt, pte + 4, 0x00000000); - pte += 8; - } - nvkm_done(pgt); -} - -static void -gf100_vm_flush(struct nvkm_vm *vm) -{ - struct nvkm_mmu *mmu = vm->mmu; - struct nvkm_device *device = mmu->subdev.device; - struct nvkm_vm_pgd *vpgd; - u32 type; - - type = 0x00000001; /* PAGE_ALL */ - if (atomic_read(&vm->engref[NVKM_SUBDEV_BAR])) - type |= 0x00000004; /* HUB_ONLY */ - - mutex_lock(&mmu->subdev.mutex); - list_for_each_entry(vpgd, &vm->pgd_list, head) { - /* looks like maybe a "free flush slots" counter, the - * faster you write to 0x100cbc to more it decreases - */ - nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x100c80) & 0x00ff0000) - break; - ); - - nvkm_wr32(device, 0x100cb8, vpgd->obj->addr >> 8); - nvkm_wr32(device, 0x100cbc, 0x80000000 | type); - - /* wait for flush to be queued? */ - nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x100c80) & 0x00008000) - break; - ); - } - mutex_unlock(&mmu->subdev.mutex); -} - -static int -gf100_vm_create(struct nvkm_mmu *mmu, u64 offset, u64 length, u64 mm_offset, - struct lock_class_key *key, struct nvkm_vm **pvm) +const u8 * +gf100_mmu_kind(struct nvkm_mmu *mmu, int *count) { - return nvkm_vm_create(mmu, offset, length, mm_offset, 4096, key, pvm); + static const u8 + kind[256] = { + 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0xff, 0x01, /* 0x00 */ + 0x01, 0x01, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x11, 0xff, 0xff, 0xff, 0xff, 0xff, 0x11, /* 0x10 */ + 0x11, 0x11, 0x11, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x26, 0x27, /* 0x20 */ + 0x28, 0x29, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x30 */ + 0xff, 0xff, 0x26, 0x27, 0x28, 0x29, 0x26, 0x27, + 0x28, 0x29, 0xff, 0xff, 0xff, 0xff, 0x46, 0xff, /* 0x40 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x46, 0x46, 0x46, 0x46, 0xff, 0xff, 0xff, /* 0x50 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x60 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70 */ + 0xff, 0xff, 0xff, 0x7b, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7b, 0x7b, /* 0x80 */ + 0x7b, 0x7b, 0xff, 0x8b, 0x8c, 0x8d, 0x8e, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x8b, 0x8c, 0x8d, 0x8e, 0xa7, /* 0xa0 */ + 0xa8, 0xa9, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa7, + 0xa8, 0xa9, 0xaa, 0xc3, 0xff, 0xff, 0xff, 0xff, /* 0xc0 */ + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xfe, 0xc3, 0xc3, + 0xc3, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0 */ + 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, + 0xfe, 0xff, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff, /* 0xe0 */ + 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, 0xfe, 0xff, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xf0 */ + 0xfe, 0xfe, 0xfe, 0xfe, 0xff, 0xfd, 0xfe, 0xff + }; + + *count = ARRAY_SIZE(kind); + return kind; } static const struct nvkm_mmu_func gf100_mmu = { - .limit = (1ULL << 40), .dma_bits = 40, - .pgt_bits = 27 - 12, - .spg_shift = 12, - .lpg_shift = 17, - .create = gf100_vm_create, - .map_pgt = gf100_vm_map_pgt, - .map = gf100_vm_map, - .map_sg = gf100_vm_map_sg, - .unmap = gf100_vm_unmap, - .flush = gf100_vm_flush, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_GF100}, gf100_vmm_new }, + .kind = gf100_mmu_kind, + .kind_sys = true, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk104.c new file mode 100644 index 000000000000..3d7d1eb1cff9 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk104.c @@ -0,0 +1,41 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" +#include "vmm.h" + +#include <nvif/class.h> + +static const struct nvkm_mmu_func +gk104_mmu = { + .dma_bits = 40, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_GF100}, gk104_vmm_new }, + .kind = gf100_mmu_kind, + .kind_sys = true, +}; + +int +gk104_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + return nvkm_mmu_new_(&gk104_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c new file mode 100644 index 000000000000..ac74965a60d4 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c @@ -0,0 +1,41 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" +#include "vmm.h" + +#include <nvif/class.h> + +static const struct nvkm_mmu_func +gk20a_mmu = { + .dma_bits = 40, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, -1, NVIF_CLASS_MEM_GF100}, .umap = gf100_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_GF100}, gk20a_vmm_new }, + .kind = gf100_mmu_kind, + .kind_sys = true, +}; + +int +gk20a_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + return nvkm_mmu_new_(&gk20a_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm200.c new file mode 100644 index 000000000000..dbf644ebac97 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm200.c @@ -0,0 +1,97 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" +#include "vmm.h" + +#include <subdev/fb.h> + +#include <nvif/class.h> + +const u8 * +gm200_mmu_kind(struct nvkm_mmu *mmu, int *count) +{ + static const u8 + kind[256] = { + 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0xff, 0x01, /* 0x00 */ + 0x01, 0x01, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x11, 0xff, 0xff, 0xff, 0xff, 0xff, 0x11, /* 0x10 */ + 0x11, 0x11, 0x11, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x26, 0x27, /* 0x20 */ + 0x28, 0x29, 0x2a, 0x2b, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x30 */ + 0xff, 0xff, 0x26, 0x27, 0x28, 0x29, 0x26, 0x27, + 0x28, 0x29, 0xff, 0xff, 0xff, 0xff, 0x46, 0xff, /* 0x40 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x46, 0x46, 0x46, 0x46, 0xff, 0xff, 0xff, /* 0x50 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x60 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70 */ + 0xff, 0xff, 0xff, 0x7b, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7b, 0x7b, /* 0x80 */ + 0x7b, 0x7b, 0xff, 0x8b, 0x8c, 0x8d, 0x8e, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x8b, 0x8c, 0x8d, 0x8e, 0xa7, /* 0xa0 */ + 0xa8, 0xa9, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa7, + 0xa8, 0xa9, 0xaa, 0xc3, 0xff, 0xff, 0xff, 0xff, /* 0xc0 */ + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xfe, 0xc3, 0xc3, + 0xc3, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0 */ + 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, + 0xfe, 0xff, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff, /* 0xe0 */ + 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, 0xfe, 0xff, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xf0 */ + 0xfe, 0xfe, 0xfe, 0xfe, 0xff, 0xfd, 0xfe, 0xff + }; + *count = ARRAY_SIZE(kind); + return kind; +} + +static const struct nvkm_mmu_func +gm200_mmu = { + .dma_bits = 40, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map }, + .vmm = {{ -1, 0, NVIF_CLASS_VMM_GM200}, gm200_vmm_new }, + .kind = gm200_mmu_kind, + .kind_sys = true, +}; + +static const struct nvkm_mmu_func +gm200_mmu_fixed = { + .dma_bits = 40, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_GM200}, gm200_vmm_new_fixed }, + .kind = gm200_mmu_kind, + .kind_sys = true, +}; + +int +gm200_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + if (device->fb->page) + return nvkm_mmu_new_(&gm200_mmu_fixed, device, index, pmmu); + return nvkm_mmu_new_(&gm200_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c new file mode 100644 index 000000000000..7353a94b4091 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c @@ -0,0 +1,55 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" +#include "vmm.h" + +#include <subdev/fb.h> + +#include <nvif/class.h> + +static const struct nvkm_mmu_func +gm20b_mmu = { + .dma_bits = 40, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, -1, NVIF_CLASS_MEM_GF100}, .umap = gf100_mem_map }, + .vmm = {{ -1, 0, NVIF_CLASS_VMM_GM200}, gm20b_vmm_new }, + .kind = gm200_mmu_kind, + .kind_sys = true, +}; + +static const struct nvkm_mmu_func +gm20b_mmu_fixed = { + .dma_bits = 40, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, -1, NVIF_CLASS_MEM_GF100}, .umap = gf100_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_GM200}, gm20b_vmm_new_fixed }, + .kind = gm200_mmu_kind, + .kind_sys = true, +}; + +int +gm20b_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + if (device->fb->page) + return nvkm_mmu_new_(&gm20b_mmu_fixed, device, index, pmmu); + return nvkm_mmu_new_(&gm20b_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/radeon/radeon_kfd.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c index 9df1fea8e971..651b8805c67c 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c @@ -1,5 +1,5 @@ /* - * Copyright 2014 Advanced Micro Devices, Inc. + * Copyright 2017 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -19,29 +19,27 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ +#include "mem.h" +#include "vmm.h" -/* - * radeon_kfd.h defines the private interface between the - * AMD kernel graphics drivers and the AMD KFD. - */ - -#ifndef RADEON_KFD_H_INCLUDED -#define RADEON_KFD_H_INCLUDED - -#include <linux/types.h> -#include "kgd_kfd_interface.h" - -struct radeon_device; +#include <core/option.h> -int radeon_kfd_init(void); -void radeon_kfd_fini(void); +#include <nvif/class.h> -void radeon_kfd_suspend(struct radeon_device *rdev); -int radeon_kfd_resume(struct radeon_device *rdev); -void radeon_kfd_interrupt(struct radeon_device *rdev, - const void *ih_ring_entry); -void radeon_kfd_device_probe(struct radeon_device *rdev); -void radeon_kfd_device_init(struct radeon_device *rdev); -void radeon_kfd_device_fini(struct radeon_device *rdev); +static const struct nvkm_mmu_func +gp100_mmu = { + .dma_bits = 47, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_GP100}, gp100_vmm_new }, + .kind = gm200_mmu_kind, + .kind_sys = true, +}; -#endif /* RADEON_KFD_H_INCLUDED */ +int +gp100_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + if (!nvkm_boolopt(device->cfgopt, "GP100MmuLayout", true)) + return gm200_mmu_new(device, index, pmmu); + return nvkm_mmu_new_(&gp100_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b. b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b. new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b. diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c new file mode 100644 index 000000000000..3bd3db31e0bb --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c @@ -0,0 +1,45 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" +#include "vmm.h" + +#include <core/option.h> + +#include <nvif/class.h> + +static const struct nvkm_mmu_func +gp10b_mmu = { + .dma_bits = 47, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, -1, NVIF_CLASS_MEM_GF100}, .umap = gf100_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_GP100}, gp10b_vmm_new }, + .kind = gm200_mmu_kind, + .kind_sys = true, +}; + +int +gp10b_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + if (!nvkm_boolopt(device->cfgopt, "GP100MmuLayout", true)) + return gm20b_mmu_new(device, index, pmmu); + return nvkm_mmu_new_(&gp10b_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mem.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mem.c new file mode 100644 index 000000000000..39808489f21d --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mem.c @@ -0,0 +1,242 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#define nvkm_mem(p) container_of((p), struct nvkm_mem, memory) +#include "mem.h" + +#include <core/memory.h> + +#include <nvif/if000a.h> +#include <nvif/unpack.h> + +struct nvkm_mem { + struct nvkm_memory memory; + enum nvkm_memory_target target; + struct nvkm_mmu *mmu; + u64 pages; + struct page **mem; + union { + struct scatterlist *sgl; + dma_addr_t *dma; + }; +}; + +static enum nvkm_memory_target +nvkm_mem_target(struct nvkm_memory *memory) +{ + return nvkm_mem(memory)->target; +} + +static u8 +nvkm_mem_page(struct nvkm_memory *memory) +{ + return PAGE_SHIFT; +} + +static u64 +nvkm_mem_addr(struct nvkm_memory *memory) +{ + struct nvkm_mem *mem = nvkm_mem(memory); + if (mem->pages == 1 && mem->mem) + return mem->dma[0]; + return ~0ULL; +} + +static u64 +nvkm_mem_size(struct nvkm_memory *memory) +{ + return nvkm_mem(memory)->pages << PAGE_SHIFT; +} + +static int +nvkm_mem_map_dma(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm, + struct nvkm_vma *vma, void *argv, u32 argc) +{ + struct nvkm_mem *mem = nvkm_mem(memory); + struct nvkm_vmm_map map = { + .memory = &mem->memory, + .offset = offset, + .dma = mem->dma, + }; + return nvkm_vmm_map(vmm, vma, argv, argc, &map); +} + +static void * +nvkm_mem_dtor(struct nvkm_memory *memory) +{ + struct nvkm_mem *mem = nvkm_mem(memory); + if (mem->mem) { + while (mem->pages--) { + dma_unmap_page(mem->mmu->subdev.device->dev, + mem->dma[mem->pages], PAGE_SIZE, + DMA_BIDIRECTIONAL); + __free_page(mem->mem[mem->pages]); + } + kvfree(mem->dma); + kvfree(mem->mem); + } + return mem; +} + +static const struct nvkm_memory_func +nvkm_mem_dma = { + .dtor = nvkm_mem_dtor, + .target = nvkm_mem_target, + .page = nvkm_mem_page, + .addr = nvkm_mem_addr, + .size = nvkm_mem_size, + .map = nvkm_mem_map_dma, +}; + +static int +nvkm_mem_map_sgl(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm, + struct nvkm_vma *vma, void *argv, u32 argc) +{ + struct nvkm_mem *mem = nvkm_mem(memory); + struct nvkm_vmm_map map = { + .memory = &mem->memory, + .offset = offset, + .sgl = mem->sgl, + }; + return nvkm_vmm_map(vmm, vma, argv, argc, &map); +} + +static const struct nvkm_memory_func +nvkm_mem_sgl = { + .dtor = nvkm_mem_dtor, + .target = nvkm_mem_target, + .page = nvkm_mem_page, + .addr = nvkm_mem_addr, + .size = nvkm_mem_size, + .map = nvkm_mem_map_sgl, +}; + +int +nvkm_mem_map_host(struct nvkm_memory *memory, void **pmap) +{ + struct nvkm_mem *mem = nvkm_mem(memory); + if (mem->mem) { + *pmap = vmap(mem->mem, mem->pages, VM_MAP, PAGE_KERNEL); + return *pmap ? 0 : -EFAULT; + } + return -EINVAL; +} + +static int +nvkm_mem_new_host(struct nvkm_mmu *mmu, int type, u8 page, u64 size, + void *argv, u32 argc, struct nvkm_memory **pmemory) +{ + struct device *dev = mmu->subdev.device->dev; + union { + struct nvif_mem_ram_vn vn; + struct nvif_mem_ram_v0 v0; + } *args = argv; + int ret = -ENOSYS; + enum nvkm_memory_target target; + struct nvkm_mem *mem; + gfp_t gfp = GFP_USER | __GFP_ZERO; + + if ( (mmu->type[type].type & NVKM_MEM_COHERENT) && + !(mmu->type[type].type & NVKM_MEM_UNCACHED)) + target = NVKM_MEM_TARGET_HOST; + else + target = NVKM_MEM_TARGET_NCOH; + + if (page != PAGE_SHIFT) + return -EINVAL; + + if (!(mem = kzalloc(sizeof(*mem), GFP_KERNEL))) + return -ENOMEM; + mem->target = target; + mem->mmu = mmu; + *pmemory = &mem->memory; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + if (args->v0.dma) { + nvkm_memory_ctor(&nvkm_mem_dma, &mem->memory); + mem->dma = args->v0.dma; + } else { + nvkm_memory_ctor(&nvkm_mem_sgl, &mem->memory); + mem->sgl = args->v0.sgl; + } + + if (!IS_ALIGNED(size, PAGE_SIZE)) + return -EINVAL; + mem->pages = size >> PAGE_SHIFT; + return 0; + } else + if ( (ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + kfree(mem); + return ret; + } + + nvkm_memory_ctor(&nvkm_mem_dma, &mem->memory); + size = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT; + + if (!(mem->mem = kvmalloc(sizeof(*mem->mem) * size, GFP_KERNEL))) + return -ENOMEM; + if (!(mem->dma = kvmalloc(sizeof(*mem->dma) * size, GFP_KERNEL))) + return -ENOMEM; + + if (mmu->dma_bits > 32) + gfp |= GFP_HIGHUSER; + else + gfp |= GFP_DMA32; + + for (mem->pages = 0; size; size--, mem->pages++) { + struct page *p = alloc_page(gfp); + if (!p) + return -ENOMEM; + + mem->dma[mem->pages] = dma_map_page(mmu->subdev.device->dev, + p, 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, mem->dma[mem->pages])) { + __free_page(p); + return -ENOMEM; + } + + mem->mem[mem->pages] = p; + } + + return 0; +} + +int +nvkm_mem_new_type(struct nvkm_mmu *mmu, int type, u8 page, u64 size, + void *argv, u32 argc, struct nvkm_memory **pmemory) +{ + struct nvkm_memory *memory = NULL; + int ret; + + if (mmu->type[type].type & NVKM_MEM_VRAM) { + ret = mmu->func->mem.vram(mmu, type, page, size, + argv, argc, &memory); + } else { + ret = nvkm_mem_new_host(mmu, type, page, size, + argv, argc, &memory); + } + + if (ret) + nvkm_memory_unref(&memory); + *pmemory = memory; + return ret; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mem.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mem.h new file mode 100644 index 000000000000..234267e1b215 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mem.h @@ -0,0 +1,23 @@ +#ifndef __NVKM_MEM_H__ +#define __NVKM_MEM_H__ +#include "priv.h" + +int nvkm_mem_new_type(struct nvkm_mmu *, int type, u8 page, u64 size, + void *argv, u32 argc, struct nvkm_memory **); +int nvkm_mem_map_host(struct nvkm_memory *, void **pmap); + +int nv04_mem_new(struct nvkm_mmu *, int, u8, u64, void *, u32, + struct nvkm_memory **); +int nv04_mem_map(struct nvkm_mmu *, struct nvkm_memory *, void *, u32, + u64 *, u64 *, struct nvkm_vma **); + +int nv50_mem_new(struct nvkm_mmu *, int, u8, u64, void *, u32, + struct nvkm_memory **); +int nv50_mem_map(struct nvkm_mmu *, struct nvkm_memory *, void *, u32, + u64 *, u64 *, struct nvkm_vma **); + +int gf100_mem_new(struct nvkm_mmu *, int, u8, u64, void *, u32, + struct nvkm_memory **); +int gf100_mem_map(struct nvkm_mmu *, struct nvkm_memory *, void *, u32, + u64 *, u64 *, struct nvkm_vma **); +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memgf100.c new file mode 100644 index 000000000000..d9c9bee45222 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memgf100.c @@ -0,0 +1,94 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" + +#include <core/memory.h> +#include <subdev/bar.h> +#include <subdev/fb.h> + +#include <nvif/class.h> +#include <nvif/if900b.h> +#include <nvif/if900d.h> +#include <nvif/unpack.h> + +int +gf100_mem_map(struct nvkm_mmu *mmu, struct nvkm_memory *memory, void *argv, + u32 argc, u64 *paddr, u64 *psize, struct nvkm_vma **pvma) +{ + struct gf100_vmm_map_v0 uvmm = {}; + union { + struct gf100_mem_map_vn vn; + struct gf100_mem_map_v0 v0; + } *args = argv; + struct nvkm_device *device = mmu->subdev.device; + struct nvkm_vmm *bar = nvkm_bar_bar1_vmm(device); + int ret = -ENOSYS; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + uvmm.ro = args->v0.ro; + uvmm.kind = args->v0.kind; + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + } else + return ret; + + ret = nvkm_vmm_get(bar, nvkm_memory_page(memory), + nvkm_memory_size(memory), pvma); + if (ret) + return ret; + + ret = nvkm_memory_map(memory, 0, bar, *pvma, &uvmm, sizeof(uvmm)); + if (ret) + return ret; + + *paddr = device->func->resource_addr(device, 1) + (*pvma)->addr; + *psize = (*pvma)->size; + return 0; +} + +int +gf100_mem_new(struct nvkm_mmu *mmu, int type, u8 page, u64 size, + void *argv, u32 argc, struct nvkm_memory **pmemory) +{ + union { + struct gf100_mem_vn vn; + struct gf100_mem_v0 v0; + } *args = argv; + int ret = -ENOSYS; + bool contig; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + contig = args->v0.contig; + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + contig = false; + } else + return ret; + + if (mmu->type[type].type & (NVKM_MEM_DISP | NVKM_MEM_COMP)) + type = NVKM_RAM_MM_NORMAL; + else + type = NVKM_RAM_MM_MIXED; + + return nvkm_ram_get(mmu->subdev.device, type, 0x01, page, + size, contig, false, pmemory); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memnv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memnv04.c new file mode 100644 index 000000000000..79a3b0cc9f5b --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memnv04.c @@ -0,0 +1,69 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" + +#include <core/memory.h> +#include <subdev/fb.h> + +#include <nvif/if000b.h> +#include <nvif/unpack.h> + +int +nv04_mem_map(struct nvkm_mmu *mmu, struct nvkm_memory *memory, void *argv, + u32 argc, u64 *paddr, u64 *psize, struct nvkm_vma **pvma) +{ + union { + struct nv04_mem_map_vn vn; + } *args = argv; + struct nvkm_device *device = mmu->subdev.device; + const u64 addr = nvkm_memory_addr(memory); + int ret = -ENOSYS; + + if ((ret = nvif_unvers(ret, &argv, &argc, args->vn))) + return ret; + + *paddr = device->func->resource_addr(device, 1) + addr; + *psize = nvkm_memory_size(memory); + *pvma = ERR_PTR(-ENODEV); + return 0; +} + +int +nv04_mem_new(struct nvkm_mmu *mmu, int type, u8 page, u64 size, + void *argv, u32 argc, struct nvkm_memory **pmemory) +{ + union { + struct nv04_mem_vn vn; + } *args = argv; + int ret = -ENOSYS; + + if ((ret = nvif_unvers(ret, &argv, &argc, args->vn))) + return ret; + + if (mmu->type[type].type & NVKM_MEM_MAPPABLE) + type = NVKM_RAM_MM_NORMAL; + else + type = NVKM_RAM_MM_NOMAP; + + return nvkm_ram_get(mmu->subdev.device, type, 0x01, page, + size, true, false, pmemory); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memnv50.c new file mode 100644 index 000000000000..46759b89fc1f --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memnv50.c @@ -0,0 +1,88 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" + +#include <core/memory.h> +#include <subdev/bar.h> +#include <subdev/fb.h> + +#include <nvif/class.h> +#include <nvif/if500b.h> +#include <nvif/if500d.h> +#include <nvif/unpack.h> + +int +nv50_mem_map(struct nvkm_mmu *mmu, struct nvkm_memory *memory, void *argv, + u32 argc, u64 *paddr, u64 *psize, struct nvkm_vma **pvma) +{ + struct nv50_vmm_map_v0 uvmm = {}; + union { + struct nv50_mem_map_vn vn; + struct nv50_mem_map_v0 v0; + } *args = argv; + struct nvkm_device *device = mmu->subdev.device; + struct nvkm_vmm *bar = nvkm_bar_bar1_vmm(device); + u64 size = nvkm_memory_size(memory); + int ret = -ENOSYS; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + uvmm.ro = args->v0.ro; + uvmm.kind = args->v0.kind; + uvmm.comp = args->v0.comp; + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + } else + return ret; + + ret = nvkm_vmm_get(bar, 12, size, pvma); + if (ret) + return ret; + + *paddr = device->func->resource_addr(device, 1) + (*pvma)->addr; + *psize = (*pvma)->size; + return nvkm_memory_map(memory, 0, bar, *pvma, &uvmm, sizeof(uvmm)); +} + +int +nv50_mem_new(struct nvkm_mmu *mmu, int type, u8 page, u64 size, + void *argv, u32 argc, struct nvkm_memory **pmemory) +{ + union { + struct nv50_mem_vn vn; + struct nv50_mem_v0 v0; + } *args = argv; + int ret = -ENOSYS; + bool contig; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + type = args->v0.bankswz ? 0x02 : 0x01; + contig = args->v0.contig; + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + type = 0x01; + contig = false; + } else + return -ENOSYS; + + return nvkm_ram_get(mmu->subdev.device, NVKM_RAM_MM_NORMAL, type, + page, size, contig, false, pmemory); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv04.c index 37927c3fdc3e..d201c887c2cd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv04.c @@ -21,129 +21,21 @@ * * Authors: Ben Skeggs */ -#include "nv04.h" +#include "mem.h" +#include "vmm.h" -#include <core/gpuobj.h> - -#define NV04_PDMA_SIZE (128 * 1024 * 1024) -#define NV04_PDMA_PAGE ( 4 * 1024) - -/******************************************************************************* - * VM map/unmap callbacks - ******************************************************************************/ - -static void -nv04_vm_map_sg(struct nvkm_vma *vma, struct nvkm_memory *pgt, - struct nvkm_mem *mem, u32 pte, u32 cnt, dma_addr_t *list) -{ - pte = 0x00008 + (pte * 4); - nvkm_kmap(pgt); - while (cnt) { - u32 page = PAGE_SIZE / NV04_PDMA_PAGE; - u32 phys = (u32)*list++; - while (cnt && page--) { - nvkm_wo32(pgt, pte, phys | 3); - phys += NV04_PDMA_PAGE; - pte += 4; - cnt -= 1; - } - } - nvkm_done(pgt); -} - -static void -nv04_vm_unmap(struct nvkm_vma *vma, struct nvkm_memory *pgt, u32 pte, u32 cnt) -{ - pte = 0x00008 + (pte * 4); - nvkm_kmap(pgt); - while (cnt--) { - nvkm_wo32(pgt, pte, 0x00000000); - pte += 4; - } - nvkm_done(pgt); -} - -static void -nv04_vm_flush(struct nvkm_vm *vm) -{ -} - -/******************************************************************************* - * MMU subdev - ******************************************************************************/ - -static int -nv04_mmu_oneinit(struct nvkm_mmu *base) -{ - struct nv04_mmu *mmu = nv04_mmu(base); - struct nvkm_device *device = mmu->base.subdev.device; - struct nvkm_memory *dma; - int ret; - - ret = nvkm_vm_create(&mmu->base, 0, NV04_PDMA_SIZE, 0, 4096, NULL, - &mmu->vm); - if (ret) - return ret; - - ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, - (NV04_PDMA_SIZE / NV04_PDMA_PAGE) * 4 + 8, - 16, true, &dma); - mmu->vm->pgt[0].mem[0] = dma; - mmu->vm->pgt[0].refcount[0] = 1; - if (ret) - return ret; - - nvkm_kmap(dma); - nvkm_wo32(dma, 0x00000, 0x0002103d); /* PCI, RW, PT, !LN */ - nvkm_wo32(dma, 0x00004, NV04_PDMA_SIZE - 1); - nvkm_done(dma); - return 0; -} - -void * -nv04_mmu_dtor(struct nvkm_mmu *base) -{ - struct nv04_mmu *mmu = nv04_mmu(base); - struct nvkm_device *device = mmu->base.subdev.device; - if (mmu->vm) { - nvkm_memory_del(&mmu->vm->pgt[0].mem[0]); - nvkm_vm_ref(NULL, &mmu->vm, NULL); - } - if (mmu->nullp) { - dma_free_coherent(device->dev, 16 * 1024, - mmu->nullp, mmu->null); - } - return mmu; -} - -int -nv04_mmu_new_(const struct nvkm_mmu_func *func, struct nvkm_device *device, - int index, struct nvkm_mmu **pmmu) -{ - struct nv04_mmu *mmu; - if (!(mmu = kzalloc(sizeof(*mmu), GFP_KERNEL))) - return -ENOMEM; - *pmmu = &mmu->base; - nvkm_mmu_ctor(func, device, index, &mmu->base); - return 0; -} +#include <nvif/class.h> const struct nvkm_mmu_func nv04_mmu = { - .oneinit = nv04_mmu_oneinit, - .dtor = nv04_mmu_dtor, - .limit = NV04_PDMA_SIZE, .dma_bits = 32, - .pgt_bits = 32 - 12, - .spg_shift = 12, - .lpg_shift = 12, - .map_sg = nv04_vm_map_sg, - .unmap = nv04_vm_unmap, - .flush = nv04_vm_flush, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_NV04}}, + .mem = {{ -1, -1, NVIF_CLASS_MEM_NV04}, nv04_mem_new, nv04_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_NV04}, nv04_vmm_new, true }, }; int nv04_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) { - return nv04_mmu_new_(&nv04_mmu, device, index, pmmu); + return nvkm_mmu_new_(&nv04_mmu, device, index, pmmu); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv04.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv04.h deleted file mode 100644 index 363e33b296d5..000000000000 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv04.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef __NV04_MMU_PRIV__ -#define __NV04_MMU_PRIV__ -#define nv04_mmu(p) container_of((p), struct nv04_mmu, base) -#include "priv.h" - -struct nv04_mmu { - struct nvkm_mmu base; - struct nvkm_vm *vm; - dma_addr_t null; - void *nullp; -}; - -int nv04_mmu_new_(const struct nvkm_mmu_func *, struct nvkm_device *, - int index, struct nvkm_mmu **); -void *nv04_mmu_dtor(struct nvkm_mmu *); - -extern const struct nvkm_mmu_func nv04_mmu; -#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv41.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv41.c index c6a26f907009..adca81895c09 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv41.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv41.c @@ -21,113 +21,29 @@ * * Authors: Ben Skeggs */ -#include "nv04.h" +#include "mem.h" +#include "vmm.h" -#include <core/gpuobj.h> #include <core/option.h> -#include <subdev/timer.h> -#define NV41_GART_SIZE (512 * 1024 * 1024) -#define NV41_GART_PAGE ( 4 * 1024) - -/******************************************************************************* - * VM map/unmap callbacks - ******************************************************************************/ - -static void -nv41_vm_map_sg(struct nvkm_vma *vma, struct nvkm_memory *pgt, - struct nvkm_mem *mem, u32 pte, u32 cnt, dma_addr_t *list) -{ - pte = pte * 4; - nvkm_kmap(pgt); - while (cnt) { - u32 page = PAGE_SIZE / NV41_GART_PAGE; - u64 phys = (u64)*list++; - while (cnt && page--) { - nvkm_wo32(pgt, pte, (phys >> 7) | 1); - phys += NV41_GART_PAGE; - pte += 4; - cnt -= 1; - } - } - nvkm_done(pgt); -} - -static void -nv41_vm_unmap(struct nvkm_vma *vma, struct nvkm_memory *pgt, u32 pte, u32 cnt) -{ - pte = pte * 4; - nvkm_kmap(pgt); - while (cnt--) { - nvkm_wo32(pgt, pte, 0x00000000); - pte += 4; - } - nvkm_done(pgt); -} - -static void -nv41_vm_flush(struct nvkm_vm *vm) -{ - struct nv04_mmu *mmu = nv04_mmu(vm->mmu); - struct nvkm_device *device = mmu->base.subdev.device; - - mutex_lock(&mmu->base.subdev.mutex); - nvkm_wr32(device, 0x100810, 0x00000022); - nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x100810) & 0x00000020) - break; - ); - nvkm_wr32(device, 0x100810, 0x00000000); - mutex_unlock(&mmu->base.subdev.mutex); -} - -/******************************************************************************* - * MMU subdev - ******************************************************************************/ - -static int -nv41_mmu_oneinit(struct nvkm_mmu *base) -{ - struct nv04_mmu *mmu = nv04_mmu(base); - struct nvkm_device *device = mmu->base.subdev.device; - int ret; - - ret = nvkm_vm_create(&mmu->base, 0, NV41_GART_SIZE, 0, 4096, NULL, - &mmu->vm); - if (ret) - return ret; - - ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, - (NV41_GART_SIZE / NV41_GART_PAGE) * 4, 16, true, - &mmu->vm->pgt[0].mem[0]); - mmu->vm->pgt[0].refcount[0] = 1; - return ret; -} +#include <nvif/class.h> static void -nv41_mmu_init(struct nvkm_mmu *base) +nv41_mmu_init(struct nvkm_mmu *mmu) { - struct nv04_mmu *mmu = nv04_mmu(base); - struct nvkm_device *device = mmu->base.subdev.device; - struct nvkm_memory *dma = mmu->vm->pgt[0].mem[0]; - nvkm_wr32(device, 0x100800, 0x00000002 | nvkm_memory_addr(dma)); + struct nvkm_device *device = mmu->subdev.device; + nvkm_wr32(device, 0x100800, 0x00000002 | mmu->vmm->pd->pt[0]->addr); nvkm_mask(device, 0x10008c, 0x00000100, 0x00000100); nvkm_wr32(device, 0x100820, 0x00000000); } static const struct nvkm_mmu_func nv41_mmu = { - .dtor = nv04_mmu_dtor, - .oneinit = nv41_mmu_oneinit, .init = nv41_mmu_init, - .limit = NV41_GART_SIZE, .dma_bits = 39, - .pgt_bits = 32 - 12, - .spg_shift = 12, - .lpg_shift = 12, - .map_sg = nv41_vm_map_sg, - .unmap = nv41_vm_unmap, - .flush = nv41_vm_flush, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_NV04}}, + .mem = {{ -1, -1, NVIF_CLASS_MEM_NV04}, nv04_mem_new, nv04_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_NV04}, nv41_vmm_new, true }, }; int @@ -137,5 +53,5 @@ nv41_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) !nvkm_boolopt(device->cfgopt, "NvPCIE", true)) return nv04_mmu_new(device, index, pmmu); - return nv04_mmu_new_(&nv41_mmu, device, index, pmmu); + return nvkm_mmu_new_(&nv41_mmu, device, index, pmmu); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv44.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv44.c index a648c2395545..598c53a27bde 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv44.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv44.c @@ -21,176 +21,18 @@ * * Authors: Ben Skeggs */ -#include "nv04.h" +#include "mem.h" +#include "vmm.h" -#include <core/gpuobj.h> #include <core/option.h> -#include <subdev/timer.h> -#define NV44_GART_SIZE (512 * 1024 * 1024) -#define NV44_GART_PAGE ( 4 * 1024) - -/******************************************************************************* - * VM map/unmap callbacks - ******************************************************************************/ - -static void -nv44_vm_fill(struct nvkm_memory *pgt, dma_addr_t null, - dma_addr_t *list, u32 pte, u32 cnt) -{ - u32 base = (pte << 2) & ~0x0000000f; - u32 tmp[4]; - - tmp[0] = nvkm_ro32(pgt, base + 0x0); - tmp[1] = nvkm_ro32(pgt, base + 0x4); - tmp[2] = nvkm_ro32(pgt, base + 0x8); - tmp[3] = nvkm_ro32(pgt, base + 0xc); - - while (cnt--) { - u32 addr = list ? (*list++ >> 12) : (null >> 12); - switch (pte++ & 0x3) { - case 0: - tmp[0] &= ~0x07ffffff; - tmp[0] |= addr; - break; - case 1: - tmp[0] &= ~0xf8000000; - tmp[0] |= addr << 27; - tmp[1] &= ~0x003fffff; - tmp[1] |= addr >> 5; - break; - case 2: - tmp[1] &= ~0xffc00000; - tmp[1] |= addr << 22; - tmp[2] &= ~0x0001ffff; - tmp[2] |= addr >> 10; - break; - case 3: - tmp[2] &= ~0xfffe0000; - tmp[2] |= addr << 17; - tmp[3] &= ~0x00000fff; - tmp[3] |= addr >> 15; - break; - } - } - - nvkm_wo32(pgt, base + 0x0, tmp[0]); - nvkm_wo32(pgt, base + 0x4, tmp[1]); - nvkm_wo32(pgt, base + 0x8, tmp[2]); - nvkm_wo32(pgt, base + 0xc, tmp[3] | 0x40000000); -} - -static void -nv44_vm_map_sg(struct nvkm_vma *vma, struct nvkm_memory *pgt, - struct nvkm_mem *mem, u32 pte, u32 cnt, dma_addr_t *list) -{ - struct nv04_mmu *mmu = nv04_mmu(vma->vm->mmu); - u32 tmp[4]; - int i; - - nvkm_kmap(pgt); - if (pte & 3) { - u32 max = 4 - (pte & 3); - u32 part = (cnt > max) ? max : cnt; - nv44_vm_fill(pgt, mmu->null, list, pte, part); - pte += part; - list += part; - cnt -= part; - } - - while (cnt >= 4) { - for (i = 0; i < 4; i++) - tmp[i] = *list++ >> 12; - nvkm_wo32(pgt, pte++ * 4, tmp[0] >> 0 | tmp[1] << 27); - nvkm_wo32(pgt, pte++ * 4, tmp[1] >> 5 | tmp[2] << 22); - nvkm_wo32(pgt, pte++ * 4, tmp[2] >> 10 | tmp[3] << 17); - nvkm_wo32(pgt, pte++ * 4, tmp[3] >> 15 | 0x40000000); - cnt -= 4; - } - - if (cnt) - nv44_vm_fill(pgt, mmu->null, list, pte, cnt); - nvkm_done(pgt); -} - -static void -nv44_vm_unmap(struct nvkm_vma *vma, struct nvkm_memory *pgt, u32 pte, u32 cnt) -{ - struct nv04_mmu *mmu = nv04_mmu(vma->vm->mmu); - - nvkm_kmap(pgt); - if (pte & 3) { - u32 max = 4 - (pte & 3); - u32 part = (cnt > max) ? max : cnt; - nv44_vm_fill(pgt, mmu->null, NULL, pte, part); - pte += part; - cnt -= part; - } - - while (cnt >= 4) { - nvkm_wo32(pgt, pte++ * 4, 0x00000000); - nvkm_wo32(pgt, pte++ * 4, 0x00000000); - nvkm_wo32(pgt, pte++ * 4, 0x00000000); - nvkm_wo32(pgt, pte++ * 4, 0x00000000); - cnt -= 4; - } - - if (cnt) - nv44_vm_fill(pgt, mmu->null, NULL, pte, cnt); - nvkm_done(pgt); -} - -static void -nv44_vm_flush(struct nvkm_vm *vm) -{ - struct nv04_mmu *mmu = nv04_mmu(vm->mmu); - struct nvkm_device *device = mmu->base.subdev.device; - nvkm_wr32(device, 0x100814, mmu->base.limit - NV44_GART_PAGE); - nvkm_wr32(device, 0x100808, 0x00000020); - nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x100808) & 0x00000001) - break; - ); - nvkm_wr32(device, 0x100808, 0x00000000); -} - -/******************************************************************************* - * MMU subdev - ******************************************************************************/ - -static int -nv44_mmu_oneinit(struct nvkm_mmu *base) -{ - struct nv04_mmu *mmu = nv04_mmu(base); - struct nvkm_device *device = mmu->base.subdev.device; - int ret; - - mmu->nullp = dma_alloc_coherent(device->dev, 16 * 1024, - &mmu->null, GFP_KERNEL); - if (!mmu->nullp) { - nvkm_warn(&mmu->base.subdev, "unable to allocate dummy pages\n"); - mmu->null = 0; - } - - ret = nvkm_vm_create(&mmu->base, 0, NV44_GART_SIZE, 0, 4096, NULL, - &mmu->vm); - if (ret) - return ret; - - ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, - (NV44_GART_SIZE / NV44_GART_PAGE) * 4, - 512 * 1024, true, - &mmu->vm->pgt[0].mem[0]); - mmu->vm->pgt[0].refcount[0] = 1; - return ret; -} +#include <nvif/class.h> static void -nv44_mmu_init(struct nvkm_mmu *base) +nv44_mmu_init(struct nvkm_mmu *mmu) { - struct nv04_mmu *mmu = nv04_mmu(base); - struct nvkm_device *device = mmu->base.subdev.device; - struct nvkm_memory *gart = mmu->vm->pgt[0].mem[0]; + struct nvkm_device *device = mmu->subdev.device; + struct nvkm_memory *pt = mmu->vmm->pd->pt[0]->memory; u32 addr; /* calculate vram address of this PRAMIN block, object must be @@ -198,11 +40,11 @@ nv44_mmu_init(struct nvkm_mmu *base) * of 512KiB for this to work correctly */ addr = nvkm_rd32(device, 0x10020c); - addr -= ((nvkm_memory_addr(gart) >> 19) + 1) << 19; + addr -= ((nvkm_memory_addr(pt) >> 19) + 1) << 19; nvkm_wr32(device, 0x100850, 0x80000000); - nvkm_wr32(device, 0x100818, mmu->null); - nvkm_wr32(device, 0x100804, NV44_GART_SIZE); + nvkm_wr32(device, 0x100818, mmu->vmm->null); + nvkm_wr32(device, 0x100804, (nvkm_memory_size(pt) / 4) * 4096); nvkm_wr32(device, 0x100850, 0x00008000); nvkm_mask(device, 0x10008c, 0x00000200, 0x00000200); nvkm_wr32(device, 0x100820, 0x00000000); @@ -212,17 +54,11 @@ nv44_mmu_init(struct nvkm_mmu *base) static const struct nvkm_mmu_func nv44_mmu = { - .dtor = nv04_mmu_dtor, - .oneinit = nv44_mmu_oneinit, .init = nv44_mmu_init, - .limit = NV44_GART_SIZE, .dma_bits = 39, - .pgt_bits = 32 - 12, - .spg_shift = 12, - .lpg_shift = 12, - .map_sg = nv44_vm_map_sg, - .unmap = nv44_vm_unmap, - .flush = nv44_vm_flush, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_NV04}}, + .mem = {{ -1, -1, NVIF_CLASS_MEM_NV04}, nv04_mem_new, nv04_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_NV04}, nv44_vmm_new, true }, }; int @@ -232,5 +68,5 @@ nv44_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) !nvkm_boolopt(device->cfgopt, "NvPCIE", true)) return nv04_mmu_new(device, index, pmmu); - return nv04_mmu_new_(&nv44_mmu, device, index, pmmu); + return nvkm_mmu_new_(&nv44_mmu, device, index, pmmu); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv50.c index a1f8d65f0276..db3dfbbb2aa0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv50.c @@ -21,207 +21,52 @@ * * Authors: Ben Skeggs */ -#include "priv.h" +#include "mem.h" +#include "vmm.h" -#include <core/gpuobj.h> -#include <subdev/fb.h> -#include <subdev/timer.h> -#include <engine/gr.h> +#include <nvif/class.h> -static void -nv50_vm_map_pgt(struct nvkm_gpuobj *pgd, u32 pde, struct nvkm_memory *pgt[2]) +const u8 * +nv50_mmu_kind(struct nvkm_mmu *base, int *count) { - u64 phys = 0xdeadcafe00000000ULL; - u32 coverage = 0; - - if (pgt[0]) { - /* present, 4KiB pages */ - phys = 0x00000003 | nvkm_memory_addr(pgt[0]); - coverage = (nvkm_memory_size(pgt[0]) >> 3) << 12; - } else - if (pgt[1]) { - /* present, 64KiB pages */ - phys = 0x00000001 | nvkm_memory_addr(pgt[1]); - coverage = (nvkm_memory_size(pgt[1]) >> 3) << 16; - } - - if (phys & 1) { - if (coverage <= 32 * 1024 * 1024) - phys |= 0x60; - else if (coverage <= 64 * 1024 * 1024) - phys |= 0x40; - else if (coverage <= 128 * 1024 * 1024) - phys |= 0x20; - } - - nvkm_kmap(pgd); - nvkm_wo32(pgd, (pde * 8) + 0, lower_32_bits(phys)); - nvkm_wo32(pgd, (pde * 8) + 4, upper_32_bits(phys)); - nvkm_done(pgd); -} - -static inline u64 -vm_addr(struct nvkm_vma *vma, u64 phys, u32 memtype, u32 target) -{ - phys |= 1; /* present */ - phys |= (u64)memtype << 40; - phys |= target << 4; - if (vma->access & NV_MEM_ACCESS_SYS) - phys |= (1 << 6); - if (!(vma->access & NV_MEM_ACCESS_WO)) - phys |= (1 << 3); - return phys; -} - -static void -nv50_vm_map(struct nvkm_vma *vma, struct nvkm_memory *pgt, - struct nvkm_mem *mem, u32 pte, u32 cnt, u64 phys, u64 delta) -{ - struct nvkm_ram *ram = vma->vm->mmu->subdev.device->fb->ram; - u32 comp = (mem->memtype & 0x180) >> 7; - u32 block, target; - int i; - - /* IGPs don't have real VRAM, re-target to stolen system memory */ - target = 0; - if (ram->stolen) { - phys += ram->stolen; - target = 3; - } - - phys = vm_addr(vma, phys, mem->memtype, target); - pte <<= 3; - cnt <<= 3; - - nvkm_kmap(pgt); - while (cnt) { - u32 offset_h = upper_32_bits(phys); - u32 offset_l = lower_32_bits(phys); - - for (i = 7; i >= 0; i--) { - block = 1 << (i + 3); - if (cnt >= block && !(pte & (block - 1))) - break; - } - offset_l |= (i << 7); - - phys += block << (vma->node->type - 3); - cnt -= block; - if (comp) { - u32 tag = mem->tag->offset + ((delta >> 16) * comp); - offset_h |= (tag << 17); - delta += block << (vma->node->type - 3); - } - - while (block) { - nvkm_wo32(pgt, pte + 0, offset_l); - nvkm_wo32(pgt, pte + 4, offset_h); - pte += 8; - block -= 8; - } - } - nvkm_done(pgt); -} - -static void -nv50_vm_map_sg(struct nvkm_vma *vma, struct nvkm_memory *pgt, - struct nvkm_mem *mem, u32 pte, u32 cnt, dma_addr_t *list) -{ - u32 target = (vma->access & NV_MEM_ACCESS_NOSNOOP) ? 3 : 2; - pte <<= 3; - nvkm_kmap(pgt); - while (cnt--) { - u64 phys = vm_addr(vma, (u64)*list++, mem->memtype, target); - nvkm_wo32(pgt, pte + 0, lower_32_bits(phys)); - nvkm_wo32(pgt, pte + 4, upper_32_bits(phys)); - pte += 8; - } - nvkm_done(pgt); -} - -static void -nv50_vm_unmap(struct nvkm_vma *vma, struct nvkm_memory *pgt, u32 pte, u32 cnt) -{ - pte <<= 3; - nvkm_kmap(pgt); - while (cnt--) { - nvkm_wo32(pgt, pte + 0, 0x00000000); - nvkm_wo32(pgt, pte + 4, 0x00000000); - pte += 8; - } - nvkm_done(pgt); -} - -static void -nv50_vm_flush(struct nvkm_vm *vm) -{ - struct nvkm_mmu *mmu = vm->mmu; - struct nvkm_subdev *subdev = &mmu->subdev; - struct nvkm_device *device = subdev->device; - int i, vme; - - mutex_lock(&subdev->mutex); - for (i = 0; i < NVKM_SUBDEV_NR; i++) { - if (!atomic_read(&vm->engref[i])) - continue; - - /* unfortunate hw bug workaround... */ - if (i == NVKM_ENGINE_GR && device->gr) { - int ret = nvkm_gr_tlb_flush(device->gr); - if (ret != -ENODEV) - continue; - } - - switch (i) { - case NVKM_ENGINE_GR : vme = 0x00; break; - case NVKM_ENGINE_VP : - case NVKM_ENGINE_MSPDEC: vme = 0x01; break; - case NVKM_SUBDEV_BAR : vme = 0x06; break; - case NVKM_ENGINE_MSPPP : - case NVKM_ENGINE_MPEG : vme = 0x08; break; - case NVKM_ENGINE_BSP : - case NVKM_ENGINE_MSVLD : vme = 0x09; break; - case NVKM_ENGINE_CIPHER: - case NVKM_ENGINE_SEC : vme = 0x0a; break; - case NVKM_ENGINE_CE0 : vme = 0x0d; break; - default: - continue; - } - - nvkm_wr32(device, 0x100c80, (vme << 16) | 1); - if (nvkm_msec(device, 2000, - if (!(nvkm_rd32(device, 0x100c80) & 0x00000001)) - break; - ) < 0) - nvkm_error(subdev, "vm flush timeout: engine %d\n", vme); - } - mutex_unlock(&subdev->mutex); -} - -static int -nv50_vm_create(struct nvkm_mmu *mmu, u64 offset, u64 length, u64 mm_offset, - struct lock_class_key *key, struct nvkm_vm **pvm) -{ - u32 block = (1 << (mmu->func->pgt_bits + 12)); - if (block > length) - block = length; - - return nvkm_vm_create(mmu, offset, length, mm_offset, block, key, pvm); + /* 0x01: no bank swizzle + * 0x02: bank swizzled + * 0x7f: invalid + * + * 0x01/0x02 are values understood by the VRAM allocator, + * and are required to avoid mixing the two types within + * a certain range. + */ + static const u8 + kind[128] = { + 0x01, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, /* 0x00 */ + 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, + 0x01, 0x01, 0x01, 0x01, 0x7f, 0x7f, 0x7f, 0x7f, /* 0x10 */ + 0x02, 0x02, 0x02, 0x02, 0x7f, 0x7f, 0x7f, 0x7f, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x7f, /* 0x20 */ + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, /* 0x30 */ + 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, /* 0x40 */ + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, 0x01, 0x01, 0x01, 0x7f, /* 0x50 */ + 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x7f, /* 0x60 */ + 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, + 0x01, 0x7f, 0x02, 0x7f, 0x01, 0x7f, 0x02, 0x7f, /* 0x70 */ + 0x01, 0x01, 0x02, 0x02, 0x01, 0x01, 0x7f, 0x7f + }; + *count = ARRAY_SIZE(kind); + return kind; } static const struct nvkm_mmu_func nv50_mmu = { - .limit = (1ULL << 40), .dma_bits = 40, - .pgt_bits = 29 - 12, - .spg_shift = 12, - .lpg_shift = 16, - .create = nv50_vm_create, - .map_pgt = nv50_vm_map_pgt, - .map = nv50_vm_map, - .map_sg = nv50_vm_map_sg, - .unmap = nv50_vm_unmap, - .flush = nv50_vm_flush, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_NV50}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_NV50}, nv50_mem_new, nv50_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_NV50}, nv50_vmm_new, false, 0x1400 }, + .kind = nv50_mmu_kind, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h index 27cedc60b507..d024d8055fcb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h @@ -9,31 +9,57 @@ int nvkm_mmu_new_(const struct nvkm_mmu_func *, struct nvkm_device *, int index, struct nvkm_mmu **); struct nvkm_mmu_func { - void *(*dtor)(struct nvkm_mmu *); - int (*oneinit)(struct nvkm_mmu *); void (*init)(struct nvkm_mmu *); - u64 limit; u8 dma_bits; - u32 pgt_bits; - u8 spg_shift; - u8 lpg_shift; - - int (*create)(struct nvkm_mmu *, u64 offset, u64 length, u64 mm_offset, - struct lock_class_key *, struct nvkm_vm **); - - void (*map_pgt)(struct nvkm_gpuobj *pgd, u32 pde, - struct nvkm_memory *pgt[2]); - void (*map)(struct nvkm_vma *, struct nvkm_memory *, - struct nvkm_mem *, u32 pte, u32 cnt, - u64 phys, u64 delta); - void (*map_sg)(struct nvkm_vma *, struct nvkm_memory *, - struct nvkm_mem *, u32 pte, u32 cnt, dma_addr_t *); - void (*unmap)(struct nvkm_vma *, struct nvkm_memory *pgt, - u32 pte, u32 cnt); - void (*flush)(struct nvkm_vm *); + + struct { + struct nvkm_sclass user; + } mmu; + + struct { + struct nvkm_sclass user; + int (*vram)(struct nvkm_mmu *, int type, u8 page, u64 size, + void *argv, u32 argc, struct nvkm_memory **); + int (*umap)(struct nvkm_mmu *, struct nvkm_memory *, void *argv, + u32 argc, u64 *addr, u64 *size, struct nvkm_vma **); + } mem; + + struct { + struct nvkm_sclass user; + int (*ctor)(struct nvkm_mmu *, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *, + const char *name, struct nvkm_vmm **); + bool global; + u32 pd_offset; + } vmm; + + const u8 *(*kind)(struct nvkm_mmu *, int *count); + bool kind_sys; +}; + +extern const struct nvkm_mmu_func nv04_mmu; + +const u8 *nv50_mmu_kind(struct nvkm_mmu *, int *count); + +const u8 *gf100_mmu_kind(struct nvkm_mmu *, int *count); + +const u8 *gm200_mmu_kind(struct nvkm_mmu *, int *); + +struct nvkm_mmu_pt { + union { + struct nvkm_mmu_ptc *ptc; + struct nvkm_mmu_ptp *ptp; + }; + struct nvkm_memory *memory; + bool sub; + u16 base; + u64 addr; + struct list_head head; }; -int nvkm_vm_create(struct nvkm_mmu *, u64, u64, u64, u32, - struct lock_class_key *, struct nvkm_vm **); +void nvkm_mmu_ptc_dump(struct nvkm_mmu *); +struct nvkm_mmu_pt * +nvkm_mmu_ptc_get(struct nvkm_mmu *, u32 size, u32 align, bool zero); +void nvkm_mmu_ptc_put(struct nvkm_mmu *, bool force, struct nvkm_mmu_pt **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c new file mode 100644 index 000000000000..fac2f9a45ea6 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c @@ -0,0 +1,192 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "umem.h" +#include "ummu.h" + +#include <core/client.h> +#include <core/memory.h> +#include <subdev/bar.h> + +#include <nvif/class.h> +#include <nvif/if000a.h> +#include <nvif/unpack.h> + +static const struct nvkm_object_func nvkm_umem; +struct nvkm_memory * +nvkm_umem_search(struct nvkm_client *client, u64 handle) +{ + struct nvkm_client *master = client->object.client; + struct nvkm_memory *memory = NULL; + struct nvkm_object *object; + struct nvkm_umem *umem; + + object = nvkm_object_search(client, handle, &nvkm_umem); + if (IS_ERR(object)) { + if (client->super && client != master) { + spin_lock(&master->lock); + list_for_each_entry(umem, &master->umem, head) { + if (umem->object.object == handle) { + memory = nvkm_memory_ref(umem->memory); + break; + } + } + spin_unlock(&master->lock); + } + } else { + umem = nvkm_umem(object); + if (!umem->priv || client->super) + memory = nvkm_memory_ref(umem->memory); + } + + return memory ? memory : ERR_PTR(-ENOENT); +} + +static int +nvkm_umem_unmap(struct nvkm_object *object) +{ + struct nvkm_umem *umem = nvkm_umem(object); + + if (!umem->map) + return -EEXIST; + + if (umem->io) { + if (!IS_ERR(umem->bar)) { + struct nvkm_device *device = umem->mmu->subdev.device; + nvkm_vmm_put(nvkm_bar_bar1_vmm(device), &umem->bar); + } else { + umem->bar = NULL; + } + } else { + vunmap(umem->map); + umem->map = NULL; + } + + return 0; +} + +static int +nvkm_umem_map(struct nvkm_object *object, void *argv, u32 argc, + enum nvkm_object_map *type, u64 *handle, u64 *length) +{ + struct nvkm_umem *umem = nvkm_umem(object); + struct nvkm_mmu *mmu = umem->mmu; + + if (!umem->mappable) + return -EINVAL; + if (umem->map) + return -EEXIST; + + if ((umem->type & NVKM_MEM_HOST) && !argc) { + int ret = nvkm_mem_map_host(umem->memory, &umem->map); + if (ret) + return ret; + + *handle = (unsigned long)(void *)umem->map; + *length = nvkm_memory_size(umem->memory); + *type = NVKM_OBJECT_MAP_VA; + return 0; + } else + if ((umem->type & NVKM_MEM_VRAM) || + (umem->type & NVKM_MEM_KIND)) { + int ret = mmu->func->mem.umap(mmu, umem->memory, argv, argc, + handle, length, &umem->bar); + if (ret) + return ret; + + *type = NVKM_OBJECT_MAP_IO; + } else { + return -EINVAL; + } + + umem->io = (*type == NVKM_OBJECT_MAP_IO); + return 0; +} + +static void * +nvkm_umem_dtor(struct nvkm_object *object) +{ + struct nvkm_umem *umem = nvkm_umem(object); + spin_lock(&umem->object.client->lock); + list_del_init(&umem->head); + spin_unlock(&umem->object.client->lock); + nvkm_memory_unref(&umem->memory); + return umem; +} + +static const struct nvkm_object_func +nvkm_umem = { + .dtor = nvkm_umem_dtor, + .map = nvkm_umem_map, + .unmap = nvkm_umem_unmap, +}; + +int +nvkm_umem_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nvkm_object **pobject) +{ + struct nvkm_mmu *mmu = nvkm_ummu(oclass->parent)->mmu; + union { + struct nvif_mem_v0 v0; + } *args = argv; + struct nvkm_umem *umem; + int type, ret = -ENOSYS; + u8 page; + u64 size; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, true))) { + type = args->v0.type; + page = args->v0.page; + size = args->v0.size; + } else + return ret; + + if (type >= mmu->type_nr) + return -EINVAL; + + if (!(umem = kzalloc(sizeof(*umem), GFP_KERNEL))) + return -ENOMEM; + nvkm_object_ctor(&nvkm_umem, oclass, &umem->object); + umem->mmu = mmu; + umem->type = mmu->type[type].type; + umem->priv = oclass->client->super; + INIT_LIST_HEAD(&umem->head); + *pobject = &umem->object; + + if (mmu->type[type].type & NVKM_MEM_MAPPABLE) { + page = max_t(u8, page, PAGE_SHIFT); + umem->mappable = true; + } + + ret = nvkm_mem_new_type(mmu, type, page, size, argv, argc, + &umem->memory); + if (ret) + return ret; + + spin_lock(&umem->object.client->lock); + list_add(&umem->head, &umem->object.client->umem); + spin_unlock(&umem->object.client->lock); + + args->v0.page = nvkm_memory_page(umem->memory); + args->v0.addr = nvkm_memory_addr(umem->memory); + args->v0.size = nvkm_memory_size(umem->memory); + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h new file mode 100644 index 000000000000..85cf692d620a --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h @@ -0,0 +1,26 @@ +#ifndef __NVKM_UMEM_H__ +#define __NVKM_UMEM_H__ +#define nvkm_umem(p) container_of((p), struct nvkm_umem, object) +#include <core/object.h> +#include "mem.h" + +struct nvkm_umem { + struct nvkm_object object; + struct nvkm_mmu *mmu; + u8 type:8; + bool priv:1; + bool mappable:1; + bool io:1; + + struct nvkm_memory *memory; + struct list_head head; + + union { + struct nvkm_vma *bar; + void *map; + }; +}; + +int nvkm_umem_new(const struct nvkm_oclass *, void *argv, u32 argc, + struct nvkm_object **); +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c new file mode 100644 index 000000000000..353f10f92b77 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c @@ -0,0 +1,178 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ummu.h" +#include "umem.h" +#include "uvmm.h" + +#include <core/client.h> + +#include <nvif/if0008.h> +#include <nvif/unpack.h> + +static int +nvkm_ummu_sclass(struct nvkm_object *object, int index, + struct nvkm_oclass *oclass) +{ + struct nvkm_mmu *mmu = nvkm_ummu(object)->mmu; + + if (mmu->func->mem.user.oclass && oclass->client->super) { + if (index-- == 0) { + oclass->base = mmu->func->mem.user; + oclass->ctor = nvkm_umem_new; + return 0; + } + } + + if (mmu->func->vmm.user.oclass) { + if (index-- == 0) { + oclass->base = mmu->func->vmm.user; + oclass->ctor = nvkm_uvmm_new; + return 0; + } + } + + return -EINVAL; +} + +static int +nvkm_ummu_heap(struct nvkm_ummu *ummu, void *argv, u32 argc) +{ + struct nvkm_mmu *mmu = ummu->mmu; + union { + struct nvif_mmu_heap_v0 v0; + } *args = argv; + int ret = -ENOSYS; + u8 index; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + if ((index = args->v0.index) >= mmu->heap_nr) + return -EINVAL; + args->v0.size = mmu->heap[index].size; + } else + return ret; + + return 0; +} + +static int +nvkm_ummu_type(struct nvkm_ummu *ummu, void *argv, u32 argc) +{ + struct nvkm_mmu *mmu = ummu->mmu; + union { + struct nvif_mmu_type_v0 v0; + } *args = argv; + int ret = -ENOSYS; + u8 type, index; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + if ((index = args->v0.index) >= mmu->type_nr) + return -EINVAL; + type = mmu->type[index].type; + args->v0.heap = mmu->type[index].heap; + args->v0.vram = !!(type & NVKM_MEM_VRAM); + args->v0.host = !!(type & NVKM_MEM_HOST); + args->v0.comp = !!(type & NVKM_MEM_COMP); + args->v0.disp = !!(type & NVKM_MEM_DISP); + args->v0.kind = !!(type & NVKM_MEM_KIND); + args->v0.mappable = !!(type & NVKM_MEM_MAPPABLE); + args->v0.coherent = !!(type & NVKM_MEM_COHERENT); + args->v0.uncached = !!(type & NVKM_MEM_UNCACHED); + } else + return ret; + + return 0; +} + +static int +nvkm_ummu_kind(struct nvkm_ummu *ummu, void *argv, u32 argc) +{ + struct nvkm_mmu *mmu = ummu->mmu; + union { + struct nvif_mmu_kind_v0 v0; + } *args = argv; + const u8 *kind = NULL; + int ret = -ENOSYS, count = 0; + + if (mmu->func->kind) + kind = mmu->func->kind(mmu, &count); + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, true))) { + if (argc != args->v0.count * sizeof(*args->v0.data)) + return -EINVAL; + if (args->v0.count > count) + return -EINVAL; + memcpy(args->v0.data, kind, args->v0.count); + } else + return ret; + + return 0; +} + +static int +nvkm_ummu_mthd(struct nvkm_object *object, u32 mthd, void *argv, u32 argc) +{ + struct nvkm_ummu *ummu = nvkm_ummu(object); + switch (mthd) { + case NVIF_MMU_V0_HEAP: return nvkm_ummu_heap(ummu, argv, argc); + case NVIF_MMU_V0_TYPE: return nvkm_ummu_type(ummu, argv, argc); + case NVIF_MMU_V0_KIND: return nvkm_ummu_kind(ummu, argv, argc); + default: + break; + } + return -EINVAL; +} + +static const struct nvkm_object_func +nvkm_ummu = { + .mthd = nvkm_ummu_mthd, + .sclass = nvkm_ummu_sclass, +}; + +int +nvkm_ummu_new(struct nvkm_device *device, const struct nvkm_oclass *oclass, + void *argv, u32 argc, struct nvkm_object **pobject) +{ + union { + struct nvif_mmu_v0 v0; + } *args = argv; + struct nvkm_mmu *mmu = device->mmu; + struct nvkm_ummu *ummu; + int ret = -ENOSYS, kinds = 0; + + if (mmu->func->kind) + mmu->func->kind(mmu, &kinds); + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + args->v0.dmabits = mmu->dma_bits; + args->v0.heap_nr = mmu->heap_nr; + args->v0.type_nr = mmu->type_nr; + args->v0.kind_nr = kinds; + } else + return ret; + + if (!(ummu = kzalloc(sizeof(*ummu), GFP_KERNEL))) + return -ENOMEM; + nvkm_object_ctor(&nvkm_ummu, oclass, &ummu->object); + ummu->mmu = mmu; + *pobject = &ummu->object; + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.h new file mode 100644 index 000000000000..0cd510dcfc68 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.h @@ -0,0 +1,14 @@ +#ifndef __NVKM_UMMU_H__ +#define __NVKM_UMMU_H__ +#define nvkm_ummu(p) container_of((p), struct nvkm_ummu, object) +#include <core/object.h> +#include "priv.h" + +struct nvkm_ummu { + struct nvkm_object object; + struct nvkm_mmu *mmu; +}; + +int nvkm_ummu_new(struct nvkm_device *, const struct nvkm_oclass *, + void *argv, u32 argc, struct nvkm_object **); +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c new file mode 100644 index 000000000000..fa81d0c1ba41 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c @@ -0,0 +1,352 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "uvmm.h" +#include "umem.h" +#include "ummu.h" + +#include <core/client.h> +#include <core/memory.h> + +#include <nvif/if000c.h> +#include <nvif/unpack.h> + +static const struct nvkm_object_func nvkm_uvmm; +struct nvkm_vmm * +nvkm_uvmm_search(struct nvkm_client *client, u64 handle) +{ + struct nvkm_object *object; + + object = nvkm_object_search(client, handle, &nvkm_uvmm); + if (IS_ERR(object)) + return (void *)object; + + return nvkm_uvmm(object)->vmm; +} + +static int +nvkm_uvmm_mthd_unmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc) +{ + struct nvkm_client *client = uvmm->object.client; + union { + struct nvif_vmm_unmap_v0 v0; + } *args = argv; + struct nvkm_vmm *vmm = uvmm->vmm; + struct nvkm_vma *vma; + int ret = -ENOSYS; + u64 addr; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + addr = args->v0.addr; + } else + return ret; + + mutex_lock(&vmm->mutex); + vma = nvkm_vmm_node_search(vmm, addr); + if (ret = -ENOENT, !vma || vma->addr != addr) { + VMM_DEBUG(vmm, "lookup %016llx: %016llx", + addr, vma ? vma->addr : ~0ULL); + goto done; + } + + if (ret = -ENOENT, (!vma->user && !client->super) || vma->busy) { + VMM_DEBUG(vmm, "denied %016llx: %d %d %d", addr, + vma->user, !client->super, vma->busy); + goto done; + } + + if (ret = -EINVAL, !vma->memory) { + VMM_DEBUG(vmm, "unmapped"); + goto done; + } + + nvkm_vmm_unmap_locked(vmm, vma); + ret = 0; +done: + mutex_unlock(&vmm->mutex); + return ret; +} + +static int +nvkm_uvmm_mthd_map(struct nvkm_uvmm *uvmm, void *argv, u32 argc) +{ + struct nvkm_client *client = uvmm->object.client; + union { + struct nvif_vmm_map_v0 v0; + } *args = argv; + u64 addr, size, handle, offset; + struct nvkm_vmm *vmm = uvmm->vmm; + struct nvkm_vma *vma; + struct nvkm_memory *memory; + int ret = -ENOSYS; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, true))) { + addr = args->v0.addr; + size = args->v0.size; + handle = args->v0.memory; + offset = args->v0.offset; + } else + return ret; + + if (IS_ERR((memory = nvkm_umem_search(client, handle)))) { + VMM_DEBUG(vmm, "memory %016llx %ld\n", handle, PTR_ERR(memory)); + return PTR_ERR(memory); + } + + mutex_lock(&vmm->mutex); + if (ret = -ENOENT, !(vma = nvkm_vmm_node_search(vmm, addr))) { + VMM_DEBUG(vmm, "lookup %016llx", addr); + goto fail; + } + + if (ret = -ENOENT, (!vma->user && !client->super) || vma->busy) { + VMM_DEBUG(vmm, "denied %016llx: %d %d %d", addr, + vma->user, !client->super, vma->busy); + goto fail; + } + + if (ret = -EINVAL, vma->addr != addr || vma->size != size) { + if (addr + size > vma->addr + vma->size || vma->memory || + (vma->refd == NVKM_VMA_PAGE_NONE && !vma->mapref)) { + VMM_DEBUG(vmm, "split %d %d %d " + "%016llx %016llx %016llx %016llx", + !!vma->memory, vma->refd, vma->mapref, + addr, size, vma->addr, (u64)vma->size); + goto fail; + } + + if (vma->addr != addr) { + const u64 tail = vma->size + vma->addr - addr; + if (ret = -ENOMEM, !(vma = nvkm_vma_tail(vma, tail))) + goto fail; + vma->part = true; + nvkm_vmm_node_insert(vmm, vma); + } + + if (vma->size != size) { + const u64 tail = vma->size - size; + struct nvkm_vma *tmp; + if (ret = -ENOMEM, !(tmp = nvkm_vma_tail(vma, tail))) { + nvkm_vmm_unmap_region(vmm, vma); + goto fail; + } + tmp->part = true; + nvkm_vmm_node_insert(vmm, tmp); + } + } + vma->busy = true; + mutex_unlock(&vmm->mutex); + + ret = nvkm_memory_map(memory, offset, vmm, vma, argv, argc); + if (ret == 0) { + /* Successful map will clear vma->busy. */ + nvkm_memory_unref(&memory); + return 0; + } + + mutex_lock(&vmm->mutex); + vma->busy = false; + nvkm_vmm_unmap_region(vmm, vma); +fail: + mutex_unlock(&vmm->mutex); + nvkm_memory_unref(&memory); + return ret; +} + +static int +nvkm_uvmm_mthd_put(struct nvkm_uvmm *uvmm, void *argv, u32 argc) +{ + struct nvkm_client *client = uvmm->object.client; + union { + struct nvif_vmm_put_v0 v0; + } *args = argv; + struct nvkm_vmm *vmm = uvmm->vmm; + struct nvkm_vma *vma; + int ret = -ENOSYS; + u64 addr; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + addr = args->v0.addr; + } else + return ret; + + mutex_lock(&vmm->mutex); + vma = nvkm_vmm_node_search(vmm, args->v0.addr); + if (ret = -ENOENT, !vma || vma->addr != addr || vma->part) { + VMM_DEBUG(vmm, "lookup %016llx: %016llx %d", addr, + vma ? vma->addr : ~0ULL, vma ? vma->part : 0); + goto done; + } + + if (ret = -ENOENT, (!vma->user && !client->super) || vma->busy) { + VMM_DEBUG(vmm, "denied %016llx: %d %d %d", addr, + vma->user, !client->super, vma->busy); + goto done; + } + + nvkm_vmm_put_locked(vmm, vma); + ret = 0; +done: + mutex_unlock(&vmm->mutex); + return ret; +} + +static int +nvkm_uvmm_mthd_get(struct nvkm_uvmm *uvmm, void *argv, u32 argc) +{ + struct nvkm_client *client = uvmm->object.client; + union { + struct nvif_vmm_get_v0 v0; + } *args = argv; + struct nvkm_vmm *vmm = uvmm->vmm; + struct nvkm_vma *vma; + int ret = -ENOSYS; + bool getref, mapref, sparse; + u8 page, align; + u64 size; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + getref = args->v0.type == NVIF_VMM_GET_V0_PTES; + mapref = args->v0.type == NVIF_VMM_GET_V0_ADDR; + sparse = args->v0.sparse; + page = args->v0.page; + align = args->v0.align; + size = args->v0.size; + } else + return ret; + + mutex_lock(&vmm->mutex); + ret = nvkm_vmm_get_locked(vmm, getref, mapref, sparse, + page, align, size, &vma); + mutex_unlock(&vmm->mutex); + if (ret) + return ret; + + args->v0.addr = vma->addr; + vma->user = !client->super; + return ret; +} + +static int +nvkm_uvmm_mthd_page(struct nvkm_uvmm *uvmm, void *argv, u32 argc) +{ + union { + struct nvif_vmm_page_v0 v0; + } *args = argv; + const struct nvkm_vmm_page *page; + int ret = -ENOSYS; + u8 type, index, nr; + + page = uvmm->vmm->func->page; + for (nr = 0; page[nr].shift; nr++); + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + if ((index = args->v0.index) >= nr) + return -EINVAL; + type = page[index].type; + args->v0.shift = page[index].shift; + args->v0.sparse = !!(type & NVKM_VMM_PAGE_SPARSE); + args->v0.vram = !!(type & NVKM_VMM_PAGE_VRAM); + args->v0.host = !!(type & NVKM_VMM_PAGE_HOST); + args->v0.comp = !!(type & NVKM_VMM_PAGE_COMP); + } else + return -ENOSYS; + + return 0; +} + +static int +nvkm_uvmm_mthd(struct nvkm_object *object, u32 mthd, void *argv, u32 argc) +{ + struct nvkm_uvmm *uvmm = nvkm_uvmm(object); + switch (mthd) { + case NVIF_VMM_V0_PAGE : return nvkm_uvmm_mthd_page (uvmm, argv, argc); + case NVIF_VMM_V0_GET : return nvkm_uvmm_mthd_get (uvmm, argv, argc); + case NVIF_VMM_V0_PUT : return nvkm_uvmm_mthd_put (uvmm, argv, argc); + case NVIF_VMM_V0_MAP : return nvkm_uvmm_mthd_map (uvmm, argv, argc); + case NVIF_VMM_V0_UNMAP : return nvkm_uvmm_mthd_unmap (uvmm, argv, argc); + default: + break; + } + return -EINVAL; +} + +static void * +nvkm_uvmm_dtor(struct nvkm_object *object) +{ + struct nvkm_uvmm *uvmm = nvkm_uvmm(object); + nvkm_vmm_unref(&uvmm->vmm); + return uvmm; +} + +static const struct nvkm_object_func +nvkm_uvmm = { + .dtor = nvkm_uvmm_dtor, + .mthd = nvkm_uvmm_mthd, +}; + +int +nvkm_uvmm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nvkm_object **pobject) +{ + struct nvkm_mmu *mmu = nvkm_ummu(oclass->parent)->mmu; + const bool more = oclass->base.maxver >= 0; + union { + struct nvif_vmm_v0 v0; + } *args = argv; + const struct nvkm_vmm_page *page; + struct nvkm_uvmm *uvmm; + int ret = -ENOSYS; + u64 addr, size; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, more))) { + addr = args->v0.addr; + size = args->v0.size; + } else + return ret; + + if (!(uvmm = kzalloc(sizeof(*uvmm), GFP_KERNEL))) + return -ENOMEM; + nvkm_object_ctor(&nvkm_uvmm, oclass, &uvmm->object); + *pobject = &uvmm->object; + + if (!mmu->vmm) { + ret = mmu->func->vmm.ctor(mmu, addr, size, argv, argc, + NULL, "user", &uvmm->vmm); + if (ret) + return ret; + + uvmm->vmm->debug = max(uvmm->vmm->debug, oclass->client->debug); + } else { + if (size) + return -EINVAL; + + uvmm->vmm = nvkm_vmm_ref(mmu->vmm); + } + + page = uvmm->vmm->func->page; + args->v0.page_nr = 0; + while (page && (page++)->shift) + args->v0.page_nr++; + args->v0.addr = uvmm->vmm->start; + args->v0.size = uvmm->vmm->limit; + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.h new file mode 100644 index 000000000000..71dab55e18a9 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.h @@ -0,0 +1,14 @@ +#ifndef __NVKM_UVMM_H__ +#define __NVKM_UVMM_H__ +#define nvkm_uvmm(p) container_of((p), struct nvkm_uvmm, object) +#include <core/object.h> +#include "vmm.h" + +struct nvkm_uvmm { + struct nvkm_object object; + struct nvkm_vmm *vmm; +}; + +int nvkm_uvmm_new(const struct nvkm_oclass *, void *argv, u32 argc, + struct nvkm_object **); +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c new file mode 100644 index 000000000000..e35d3e17cd7c --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c @@ -0,0 +1,1513 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#define NVKM_VMM_LEVELS_MAX 5 +#include "vmm.h" + +#include <subdev/fb.h> + +static void +nvkm_vmm_pt_del(struct nvkm_vmm_pt **ppgt) +{ + struct nvkm_vmm_pt *pgt = *ppgt; + if (pgt) { + kvfree(pgt->pde); + kfree(pgt); + *ppgt = NULL; + } +} + + +static struct nvkm_vmm_pt * +nvkm_vmm_pt_new(const struct nvkm_vmm_desc *desc, bool sparse, + const struct nvkm_vmm_page *page) +{ + const u32 pten = 1 << desc->bits; + struct nvkm_vmm_pt *pgt; + u32 lpte = 0; + + if (desc->type > PGT) { + if (desc->type == SPT) { + const struct nvkm_vmm_desc *pair = page[-1].desc; + lpte = pten >> (desc->bits - pair->bits); + } else { + lpte = pten; + } + } + + if (!(pgt = kzalloc(sizeof(*pgt) + lpte, GFP_KERNEL))) + return NULL; + pgt->page = page ? page->shift : 0; + pgt->sparse = sparse; + + if (desc->type == PGD) { + pgt->pde = kvzalloc(sizeof(*pgt->pde) * pten, GFP_KERNEL); + if (!pgt->pde) { + kfree(pgt); + return NULL; + } + } + + return pgt; +} + +struct nvkm_vmm_iter { + const struct nvkm_vmm_page *page; + const struct nvkm_vmm_desc *desc; + struct nvkm_vmm *vmm; + u64 cnt; + u16 max, lvl; + u32 pte[NVKM_VMM_LEVELS_MAX]; + struct nvkm_vmm_pt *pt[NVKM_VMM_LEVELS_MAX]; + int flush; +}; + +#ifdef CONFIG_NOUVEAU_DEBUG_MMU +static const char * +nvkm_vmm_desc_type(const struct nvkm_vmm_desc *desc) +{ + switch (desc->type) { + case PGD: return "PGD"; + case PGT: return "PGT"; + case SPT: return "SPT"; + case LPT: return "LPT"; + default: + return "UNKNOWN"; + } +} + +static void +nvkm_vmm_trace(struct nvkm_vmm_iter *it, char *buf) +{ + int lvl; + for (lvl = it->max; lvl >= 0; lvl--) { + if (lvl >= it->lvl) + buf += sprintf(buf, "%05x:", it->pte[lvl]); + else + buf += sprintf(buf, "xxxxx:"); + } +} + +#define TRA(i,f,a...) do { \ + char _buf[NVKM_VMM_LEVELS_MAX * 7]; \ + struct nvkm_vmm_iter *_it = (i); \ + nvkm_vmm_trace(_it, _buf); \ + VMM_TRACE(_it->vmm, "%s "f, _buf, ##a); \ +} while(0) +#else +#define TRA(i,f,a...) +#endif + +static inline void +nvkm_vmm_flush_mark(struct nvkm_vmm_iter *it) +{ + it->flush = min(it->flush, it->max - it->lvl); +} + +static inline void +nvkm_vmm_flush(struct nvkm_vmm_iter *it) +{ + if (it->flush != NVKM_VMM_LEVELS_MAX) { + if (it->vmm->func->flush) { + TRA(it, "flush: %d", it->flush); + it->vmm->func->flush(it->vmm, it->flush); + } + it->flush = NVKM_VMM_LEVELS_MAX; + } +} + +static void +nvkm_vmm_unref_pdes(struct nvkm_vmm_iter *it) +{ + const struct nvkm_vmm_desc *desc = it->desc; + const int type = desc[it->lvl].type == SPT; + struct nvkm_vmm_pt *pgd = it->pt[it->lvl + 1]; + struct nvkm_vmm_pt *pgt = it->pt[it->lvl]; + struct nvkm_mmu_pt *pt = pgt->pt[type]; + struct nvkm_vmm *vmm = it->vmm; + u32 pdei = it->pte[it->lvl + 1]; + + /* Recurse up the tree, unreferencing/destroying unneeded PDs. */ + it->lvl++; + if (--pgd->refs[0]) { + const struct nvkm_vmm_desc_func *func = desc[it->lvl].func; + /* PD has other valid PDEs, so we need a proper update. */ + TRA(it, "PDE unmap %s", nvkm_vmm_desc_type(&desc[it->lvl - 1])); + pgt->pt[type] = NULL; + if (!pgt->refs[!type]) { + /* PDE no longer required. */ + if (pgd->pt[0]) { + if (pgt->sparse) { + func->sparse(vmm, pgd->pt[0], pdei, 1); + pgd->pde[pdei] = NVKM_VMM_PDE_SPARSE; + } else { + func->unmap(vmm, pgd->pt[0], pdei, 1); + pgd->pde[pdei] = NULL; + } + } else { + /* Special handling for Tesla-class GPUs, + * where there's no central PD, but each + * instance has its own embedded PD. + */ + func->pde(vmm, pgd, pdei); + pgd->pde[pdei] = NULL; + } + } else { + /* PDE was pointing at dual-PTs and we're removing + * one of them, leaving the other in place. + */ + func->pde(vmm, pgd, pdei); + } + + /* GPU may have cached the PTs, flush before freeing. */ + nvkm_vmm_flush_mark(it); + nvkm_vmm_flush(it); + } else { + /* PD has no valid PDEs left, so we can just destroy it. */ + nvkm_vmm_unref_pdes(it); + } + + /* Destroy PD/PT. */ + TRA(it, "PDE free %s", nvkm_vmm_desc_type(&desc[it->lvl - 1])); + nvkm_mmu_ptc_put(vmm->mmu, vmm->bootstrapped, &pt); + if (!pgt->refs[!type]) + nvkm_vmm_pt_del(&pgt); + it->lvl--; +} + +static void +nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, + const struct nvkm_vmm_desc *desc, u32 ptei, u32 ptes) +{ + const struct nvkm_vmm_desc *pair = it->page[-1].desc; + const u32 sptb = desc->bits - pair->bits; + const u32 sptn = 1 << sptb; + struct nvkm_vmm *vmm = it->vmm; + u32 spti = ptei & (sptn - 1), lpti, pteb; + + /* Determine how many SPTEs are being touched under each LPTE, + * and drop reference counts. + */ + for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) { + const u32 pten = min(sptn - spti, ptes); + pgt->pte[lpti] -= pten; + ptes -= pten; + } + + /* We're done here if there's no corresponding LPT. */ + if (!pgt->refs[0]) + return; + + for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) { + /* Skip over any LPTEs that still have valid SPTEs. */ + if (pgt->pte[pteb] & NVKM_VMM_PTE_SPTES) { + for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { + if (!(pgt->pte[ptei] & NVKM_VMM_PTE_SPTES)) + break; + } + continue; + } + + /* As there's no more non-UNMAPPED SPTEs left in the range + * covered by a number of LPTEs, the LPTEs once again take + * control over their address range. + * + * Determine how many LPTEs need to transition state. + */ + pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID; + for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { + if (pgt->pte[ptei] & NVKM_VMM_PTE_SPTES) + break; + pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID; + } + + if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) { + TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes); + pair->func->sparse(vmm, pgt->pt[0], pteb, ptes); + } else + if (pair->func->invalid) { + /* If the MMU supports it, restore the LPTE to the + * INVALID state to tell the MMU there is no point + * trying to fetch the corresponding SPTEs. + */ + TRA(it, "LPTE %05x: U -> I %d PTEs", pteb, ptes); + pair->func->invalid(vmm, pgt->pt[0], pteb, ptes); + } + } +} + +static bool +nvkm_vmm_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +{ + const struct nvkm_vmm_desc *desc = it->desc; + const int type = desc->type == SPT; + struct nvkm_vmm_pt *pgt = it->pt[0]; + + /* Drop PTE references. */ + pgt->refs[type] -= ptes; + + /* Dual-PTs need special handling, unless PDE becoming invalid. */ + if (desc->type == SPT && (pgt->refs[0] || pgt->refs[1])) + nvkm_vmm_unref_sptes(it, pgt, desc, ptei, ptes); + + /* PT no longer neeed? Destroy it. */ + if (!pgt->refs[type]) { + it->lvl++; + TRA(it, "%s empty", nvkm_vmm_desc_type(desc)); + it->lvl--; + nvkm_vmm_unref_pdes(it); + return false; /* PTE writes for unmap() not necessary. */ + } + + return true; +} + +static void +nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, + const struct nvkm_vmm_desc *desc, u32 ptei, u32 ptes) +{ + const struct nvkm_vmm_desc *pair = it->page[-1].desc; + const u32 sptb = desc->bits - pair->bits; + const u32 sptn = 1 << sptb; + struct nvkm_vmm *vmm = it->vmm; + u32 spti = ptei & (sptn - 1), lpti, pteb; + + /* Determine how many SPTEs are being touched under each LPTE, + * and increase reference counts. + */ + for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) { + const u32 pten = min(sptn - spti, ptes); + pgt->pte[lpti] += pten; + ptes -= pten; + } + + /* We're done here if there's no corresponding LPT. */ + if (!pgt->refs[0]) + return; + + for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) { + /* Skip over any LPTEs that already have valid SPTEs. */ + if (pgt->pte[pteb] & NVKM_VMM_PTE_VALID) { + for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { + if (!(pgt->pte[ptei] & NVKM_VMM_PTE_VALID)) + break; + } + continue; + } + + /* As there are now non-UNMAPPED SPTEs in the range covered + * by a number of LPTEs, we need to transfer control of the + * address range to the SPTEs. + * + * Determine how many LPTEs need to transition state. + */ + pgt->pte[ptei] |= NVKM_VMM_PTE_VALID; + for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { + if (pgt->pte[ptei] & NVKM_VMM_PTE_VALID) + break; + pgt->pte[ptei] |= NVKM_VMM_PTE_VALID; + } + + if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) { + const u32 spti = pteb * sptn; + const u32 sptc = ptes * sptn; + /* The entire LPTE is marked as sparse, we need + * to make sure that the SPTEs are too. + */ + TRA(it, "SPTE %05x: U -> S %d PTEs", spti, sptc); + desc->func->sparse(vmm, pgt->pt[1], spti, sptc); + /* Sparse LPTEs prevent SPTEs from being accessed. */ + TRA(it, "LPTE %05x: S -> U %d PTEs", pteb, ptes); + pair->func->unmap(vmm, pgt->pt[0], pteb, ptes); + } else + if (pair->func->invalid) { + /* MMU supports blocking SPTEs by marking an LPTE + * as INVALID. We need to reverse that here. + */ + TRA(it, "LPTE %05x: I -> U %d PTEs", pteb, ptes); + pair->func->unmap(vmm, pgt->pt[0], pteb, ptes); + } + } +} + +static bool +nvkm_vmm_ref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +{ + const struct nvkm_vmm_desc *desc = it->desc; + const int type = desc->type == SPT; + struct nvkm_vmm_pt *pgt = it->pt[0]; + + /* Take PTE references. */ + pgt->refs[type] += ptes; + + /* Dual-PTs need special handling. */ + if (desc->type == SPT) + nvkm_vmm_ref_sptes(it, pgt, desc, ptei, ptes); + + return true; +} + +static void +nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc, + struct nvkm_vmm_pt *pgt, u32 ptei, u32 ptes) +{ + if (desc->type == PGD) { + while (ptes--) + pgt->pde[ptei++] = NVKM_VMM_PDE_SPARSE; + } else + if (desc->type == LPT) { + memset(&pgt->pte[ptei], NVKM_VMM_PTE_SPARSE, ptes); + } +} + +static bool +nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +{ + struct nvkm_vmm_pt *pt = it->pt[0]; + if (it->desc->type == PGD) + memset(&pt->pde[ptei], 0x00, sizeof(pt->pde[0]) * ptes); + else + if (it->desc->type == LPT) + memset(&pt->pte[ptei], 0x00, sizeof(pt->pte[0]) * ptes); + return nvkm_vmm_unref_ptes(it, ptei, ptes); +} + +static bool +nvkm_vmm_sparse_ref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +{ + nvkm_vmm_sparse_ptes(it->desc, it->pt[0], ptei, ptes); + return nvkm_vmm_ref_ptes(it, ptei, ptes); +} + +static bool +nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) +{ + const struct nvkm_vmm_desc *desc = &it->desc[it->lvl - 1]; + const int type = desc->type == SPT; + struct nvkm_vmm_pt *pgt = pgd->pde[pdei]; + const bool zero = !pgt->sparse && !desc->func->invalid; + struct nvkm_vmm *vmm = it->vmm; + struct nvkm_mmu *mmu = vmm->mmu; + struct nvkm_mmu_pt *pt; + u32 pten = 1 << desc->bits; + u32 pteb, ptei, ptes; + u32 size = desc->size * pten; + + pgd->refs[0]++; + + pgt->pt[type] = nvkm_mmu_ptc_get(mmu, size, desc->align, zero); + if (!pgt->pt[type]) { + it->lvl--; + nvkm_vmm_unref_pdes(it); + return false; + } + + if (zero) + goto done; + + pt = pgt->pt[type]; + + if (desc->type == LPT && pgt->refs[1]) { + /* SPT already exists covering the same range as this LPT, + * which means we need to be careful that any LPTEs which + * overlap valid SPTEs are unmapped as opposed to invalid + * or sparse, which would prevent the MMU from looking at + * the SPTEs on some GPUs. + */ + for (ptei = pteb = 0; ptei < pten; pteb = ptei) { + bool spte = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES; + for (ptes = 1, ptei++; ptei < pten; ptes++, ptei++) { + bool next = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES; + if (spte != next) + break; + } + + if (!spte) { + if (pgt->sparse) + desc->func->sparse(vmm, pt, pteb, ptes); + else + desc->func->invalid(vmm, pt, pteb, ptes); + memset(&pgt->pte[pteb], 0x00, ptes); + } else { + desc->func->unmap(vmm, pt, pteb, ptes); + while (ptes--) + pgt->pte[pteb++] |= NVKM_VMM_PTE_VALID; + } + } + } else { + if (pgt->sparse) { + nvkm_vmm_sparse_ptes(desc, pgt, 0, pten); + desc->func->sparse(vmm, pt, 0, pten); + } else { + desc->func->invalid(vmm, pt, 0, pten); + } + } + +done: + TRA(it, "PDE write %s", nvkm_vmm_desc_type(desc)); + it->desc[it->lvl].func->pde(it->vmm, pgd, pdei); + nvkm_vmm_flush_mark(it); + return true; +} + +static bool +nvkm_vmm_ref_swpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) +{ + const struct nvkm_vmm_desc *desc = &it->desc[it->lvl - 1]; + struct nvkm_vmm_pt *pgt = pgd->pde[pdei]; + + pgt = nvkm_vmm_pt_new(desc, NVKM_VMM_PDE_SPARSED(pgt), it->page); + if (!pgt) { + if (!pgd->refs[0]) + nvkm_vmm_unref_pdes(it); + return false; + } + + pgd->pde[pdei] = pgt; + return true; +} + +static inline u64 +nvkm_vmm_iter(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size, const char *name, bool ref, + bool (*REF_PTES)(struct nvkm_vmm_iter *, u32, u32), + nvkm_vmm_pte_func MAP_PTES, struct nvkm_vmm_map *map, + nvkm_vmm_pxe_func CLR_PTES) +{ + const struct nvkm_vmm_desc *desc = page->desc; + struct nvkm_vmm_iter it; + u64 bits = addr >> page->shift; + + it.page = page; + it.desc = desc; + it.vmm = vmm; + it.cnt = size >> page->shift; + it.flush = NVKM_VMM_LEVELS_MAX; + + /* Deconstruct address into PTE indices for each mapping level. */ + for (it.lvl = 0; desc[it.lvl].bits; it.lvl++) { + it.pte[it.lvl] = bits & ((1 << desc[it.lvl].bits) - 1); + bits >>= desc[it.lvl].bits; + } + it.max = --it.lvl; + it.pt[it.max] = vmm->pd; + + it.lvl = 0; + TRA(&it, "%s: %016llx %016llx %d %lld PTEs", name, + addr, size, page->shift, it.cnt); + it.lvl = it.max; + + /* Depth-first traversal of page tables. */ + while (it.cnt) { + struct nvkm_vmm_pt *pgt = it.pt[it.lvl]; + const int type = desc->type == SPT; + const u32 pten = 1 << desc->bits; + const u32 ptei = it.pte[0]; + const u32 ptes = min_t(u64, it.cnt, pten - ptei); + + /* Walk down the tree, finding page tables for each level. */ + for (; it.lvl; it.lvl--) { + const u32 pdei = it.pte[it.lvl]; + struct nvkm_vmm_pt *pgd = pgt; + + /* Software PT. */ + if (ref && NVKM_VMM_PDE_INVALID(pgd->pde[pdei])) { + if (!nvkm_vmm_ref_swpt(&it, pgd, pdei)) + goto fail; + } + it.pt[it.lvl - 1] = pgt = pgd->pde[pdei]; + + /* Hardware PT. + * + * This is a separate step from above due to GF100 and + * newer having dual page tables at some levels, which + * are refcounted independently. + */ + if (ref && !pgt->refs[desc[it.lvl - 1].type == SPT]) { + if (!nvkm_vmm_ref_hwpt(&it, pgd, pdei)) + goto fail; + } + } + + /* Handle PTE updates. */ + if (!REF_PTES || REF_PTES(&it, ptei, ptes)) { + struct nvkm_mmu_pt *pt = pgt->pt[type]; + if (MAP_PTES || CLR_PTES) { + if (MAP_PTES) + MAP_PTES(vmm, pt, ptei, ptes, map); + else + CLR_PTES(vmm, pt, ptei, ptes); + nvkm_vmm_flush_mark(&it); + } + } + + /* Walk back up the tree to the next position. */ + it.pte[it.lvl] += ptes; + it.cnt -= ptes; + if (it.cnt) { + while (it.pte[it.lvl] == (1 << desc[it.lvl].bits)) { + it.pte[it.lvl++] = 0; + it.pte[it.lvl]++; + } + } + }; + + nvkm_vmm_flush(&it); + return ~0ULL; + +fail: + /* Reconstruct the failure address so the caller is able to + * reverse any partially completed operations. + */ + addr = it.pte[it.max--]; + do { + addr = addr << desc[it.max].bits; + addr |= it.pte[it.max]; + } while (it.max--); + + return addr << page->shift; +} + +static void +nvkm_vmm_ptes_sparse_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size) +{ + nvkm_vmm_iter(vmm, page, addr, size, "sparse unref", false, + nvkm_vmm_sparse_unref_ptes, NULL, NULL, + page->desc->func->invalid ? + page->desc->func->invalid : page->desc->func->unmap); +} + +static int +nvkm_vmm_ptes_sparse_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size) +{ + if ((page->type & NVKM_VMM_PAGE_SPARSE)) { + u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "sparse ref", + true, nvkm_vmm_sparse_ref_ptes, NULL, + NULL, page->desc->func->sparse); + if (fail != ~0ULL) { + if ((size = fail - addr)) + nvkm_vmm_ptes_sparse_put(vmm, page, addr, size); + return -ENOMEM; + } + return 0; + } + return -EINVAL; +} + +static int +nvkm_vmm_ptes_sparse(struct nvkm_vmm *vmm, u64 addr, u64 size, bool ref) +{ + const struct nvkm_vmm_page *page = vmm->func->page; + int m = 0, i; + u64 start = addr; + u64 block; + + while (size) { + /* Limit maximum page size based on remaining size. */ + while (size < (1ULL << page[m].shift)) + m++; + i = m; + + /* Find largest page size suitable for alignment. */ + while (!IS_ALIGNED(addr, 1ULL << page[i].shift)) + i++; + + /* Determine number of PTEs at this page size. */ + if (i != m) { + /* Limited to alignment boundary of next page size. */ + u64 next = 1ULL << page[i - 1].shift; + u64 part = ALIGN(addr, next) - addr; + if (size - part >= next) + block = (part >> page[i].shift) << page[i].shift; + else + block = (size >> page[i].shift) << page[i].shift; + } else { + block = (size >> page[i].shift) << page[i].shift;; + } + + /* Perform operation. */ + if (ref) { + int ret = nvkm_vmm_ptes_sparse_get(vmm, &page[i], addr, block); + if (ret) { + if ((size = addr - start)) + nvkm_vmm_ptes_sparse(vmm, start, size, false); + return ret; + } + } else { + nvkm_vmm_ptes_sparse_put(vmm, &page[i], addr, block); + } + + size -= block; + addr += block; + } + + return 0; +} + +static void +nvkm_vmm_ptes_unmap_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size, bool sparse) +{ + const struct nvkm_vmm_desc_func *func = page->desc->func; + nvkm_vmm_iter(vmm, page, addr, size, "unmap + unref", + false, nvkm_vmm_unref_ptes, NULL, NULL, + sparse ? func->sparse : func->invalid ? func->invalid : + func->unmap); +} + +static int +nvkm_vmm_ptes_get_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size, struct nvkm_vmm_map *map, + nvkm_vmm_pte_func func) +{ + u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref + map", true, + nvkm_vmm_ref_ptes, func, map, NULL); + if (fail != ~0ULL) { + if ((size = fail - addr)) + nvkm_vmm_ptes_unmap_put(vmm, page, addr, size, false); + return -ENOMEM; + } + return 0; +} + +static void +nvkm_vmm_ptes_unmap(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size, bool sparse) +{ + const struct nvkm_vmm_desc_func *func = page->desc->func; + nvkm_vmm_iter(vmm, page, addr, size, "unmap", false, NULL, NULL, NULL, + sparse ? func->sparse : func->invalid ? func->invalid : + func->unmap); +} + +static void +nvkm_vmm_ptes_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size, struct nvkm_vmm_map *map, + nvkm_vmm_pte_func func) +{ + nvkm_vmm_iter(vmm, page, addr, size, "map", false, + NULL, func, map, NULL); +} + +static void +nvkm_vmm_ptes_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size) +{ + nvkm_vmm_iter(vmm, page, addr, size, "unref", false, + nvkm_vmm_unref_ptes, NULL, NULL, NULL); +} + +static int +nvkm_vmm_ptes_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size) +{ + u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref", true, + nvkm_vmm_ref_ptes, NULL, NULL, NULL); + if (fail != ~0ULL) { + if (fail != addr) + nvkm_vmm_ptes_put(vmm, page, addr, fail - addr); + return -ENOMEM; + } + return 0; +} + +static inline struct nvkm_vma * +nvkm_vma_new(u64 addr, u64 size) +{ + struct nvkm_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); + if (vma) { + vma->addr = addr; + vma->size = size; + vma->page = NVKM_VMA_PAGE_NONE; + vma->refd = NVKM_VMA_PAGE_NONE; + } + return vma; +} + +struct nvkm_vma * +nvkm_vma_tail(struct nvkm_vma *vma, u64 tail) +{ + struct nvkm_vma *new; + + BUG_ON(vma->size == tail); + + if (!(new = nvkm_vma_new(vma->addr + (vma->size - tail), tail))) + return NULL; + vma->size -= tail; + + new->mapref = vma->mapref; + new->sparse = vma->sparse; + new->page = vma->page; + new->refd = vma->refd; + new->used = vma->used; + new->part = vma->part; + new->user = vma->user; + new->busy = vma->busy; + list_add(&new->head, &vma->head); + return new; +} + +static void +nvkm_vmm_free_insert(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + struct rb_node **ptr = &vmm->free.rb_node; + struct rb_node *parent = NULL; + + while (*ptr) { + struct nvkm_vma *this = rb_entry(*ptr, typeof(*this), tree); + parent = *ptr; + if (vma->size < this->size) + ptr = &parent->rb_left; + else + if (vma->size > this->size) + ptr = &parent->rb_right; + else + if (vma->addr < this->addr) + ptr = &parent->rb_left; + else + if (vma->addr > this->addr) + ptr = &parent->rb_right; + else + BUG(); + } + + rb_link_node(&vma->tree, parent, ptr); + rb_insert_color(&vma->tree, &vmm->free); +} + +void +nvkm_vmm_node_insert(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + struct rb_node **ptr = &vmm->root.rb_node; + struct rb_node *parent = NULL; + + while (*ptr) { + struct nvkm_vma *this = rb_entry(*ptr, typeof(*this), tree); + parent = *ptr; + if (vma->addr < this->addr) + ptr = &parent->rb_left; + else + if (vma->addr > this->addr) + ptr = &parent->rb_right; + else + BUG(); + } + + rb_link_node(&vma->tree, parent, ptr); + rb_insert_color(&vma->tree, &vmm->root); +} + +struct nvkm_vma * +nvkm_vmm_node_search(struct nvkm_vmm *vmm, u64 addr) +{ + struct rb_node *node = vmm->root.rb_node; + while (node) { + struct nvkm_vma *vma = rb_entry(node, typeof(*vma), tree); + if (addr < vma->addr) + node = node->rb_left; + else + if (addr >= vma->addr + vma->size) + node = node->rb_right; + else + return vma; + } + return NULL; +} + +static void +nvkm_vmm_dtor(struct nvkm_vmm *vmm) +{ + struct nvkm_vma *vma; + struct rb_node *node; + + while ((node = rb_first(&vmm->root))) { + struct nvkm_vma *vma = rb_entry(node, typeof(*vma), tree); + nvkm_vmm_put(vmm, &vma); + } + + if (vmm->bootstrapped) { + const struct nvkm_vmm_page *page = vmm->func->page; + const u64 limit = vmm->limit - vmm->start; + + while (page[1].shift) + page++; + + nvkm_mmu_ptc_dump(vmm->mmu); + nvkm_vmm_ptes_put(vmm, page, vmm->start, limit); + } + + vma = list_first_entry(&vmm->list, typeof(*vma), head); + list_del(&vma->head); + kfree(vma); + WARN_ON(!list_empty(&vmm->list)); + + if (vmm->nullp) { + dma_free_coherent(vmm->mmu->subdev.device->dev, 16 * 1024, + vmm->nullp, vmm->null); + } + + if (vmm->pd) { + nvkm_mmu_ptc_put(vmm->mmu, true, &vmm->pd->pt[0]); + nvkm_vmm_pt_del(&vmm->pd); + } +} + +int +nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, + u32 pd_header, u64 addr, u64 size, struct lock_class_key *key, + const char *name, struct nvkm_vmm *vmm) +{ + static struct lock_class_key _key; + const struct nvkm_vmm_page *page = func->page; + const struct nvkm_vmm_desc *desc; + struct nvkm_vma *vma; + int levels, bits = 0; + + vmm->func = func; + vmm->mmu = mmu; + vmm->name = name; + vmm->debug = mmu->subdev.debug; + kref_init(&vmm->kref); + + __mutex_init(&vmm->mutex, "&vmm->mutex", key ? key : &_key); + + /* Locate the smallest page size supported by the backend, it will + * have the the deepest nesting of page tables. + */ + while (page[1].shift) + page++; + + /* Locate the structure that describes the layout of the top-level + * page table, and determine the number of valid bits in a virtual + * address. + */ + for (levels = 0, desc = page->desc; desc->bits; desc++, levels++) + bits += desc->bits; + bits += page->shift; + desc--; + + if (WARN_ON(levels > NVKM_VMM_LEVELS_MAX)) + return -EINVAL; + + vmm->start = addr; + vmm->limit = size ? (addr + size) : (1ULL << bits); + if (vmm->start > vmm->limit || vmm->limit > (1ULL << bits)) + return -EINVAL; + + /* Allocate top-level page table. */ + vmm->pd = nvkm_vmm_pt_new(desc, false, NULL); + if (!vmm->pd) + return -ENOMEM; + vmm->pd->refs[0] = 1; + INIT_LIST_HEAD(&vmm->join); + + /* ... and the GPU storage for it, except on Tesla-class GPUs that + * have the PD embedded in the instance structure. + */ + if (desc->size) { + const u32 size = pd_header + desc->size * (1 << desc->bits); + vmm->pd->pt[0] = nvkm_mmu_ptc_get(mmu, size, desc->align, true); + if (!vmm->pd->pt[0]) + return -ENOMEM; + } + + /* Initialise address-space MM. */ + INIT_LIST_HEAD(&vmm->list); + vmm->free = RB_ROOT; + vmm->root = RB_ROOT; + + if (!(vma = nvkm_vma_new(vmm->start, vmm->limit - vmm->start))) + return -ENOMEM; + + nvkm_vmm_free_insert(vmm, vma); + list_add(&vma->head, &vmm->list); + return 0; +} + +int +nvkm_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, + u32 hdr, u64 addr, u64 size, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) +{ + if (!(*pvmm = kzalloc(sizeof(**pvmm), GFP_KERNEL))) + return -ENOMEM; + return nvkm_vmm_ctor(func, mmu, hdr, addr, size, key, name, *pvmm); +} + +#define node(root, dir) ((root)->head.dir == &vmm->list) ? NULL : \ + list_entry((root)->head.dir, struct nvkm_vma, head) + +void +nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + struct nvkm_vma *next; + + nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags); + nvkm_memory_unref(&vma->memory); + + if (vma->part) { + struct nvkm_vma *prev = node(vma, prev); + if (!prev->memory) { + prev->size += vma->size; + rb_erase(&vma->tree, &vmm->root); + list_del(&vma->head); + kfree(vma); + vma = prev; + } + } + + next = node(vma, next); + if (next && next->part) { + if (!next->memory) { + vma->size += next->size; + rb_erase(&next->tree, &vmm->root); + list_del(&next->head); + kfree(next); + } + } +} + +void +nvkm_vmm_unmap_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + const struct nvkm_vmm_page *page = &vmm->func->page[vma->refd]; + + if (vma->mapref) { + nvkm_vmm_ptes_unmap_put(vmm, page, vma->addr, vma->size, vma->sparse); + vma->refd = NVKM_VMA_PAGE_NONE; + } else { + nvkm_vmm_ptes_unmap(vmm, page, vma->addr, vma->size, vma->sparse); + } + + nvkm_vmm_unmap_region(vmm, vma); +} + +void +nvkm_vmm_unmap(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + if (vma->memory) { + mutex_lock(&vmm->mutex); + nvkm_vmm_unmap_locked(vmm, vma); + mutex_unlock(&vmm->mutex); + } +} + +static int +nvkm_vmm_map_valid(struct nvkm_vmm *vmm, struct nvkm_vma *vma, + void *argv, u32 argc, struct nvkm_vmm_map *map) +{ + switch (nvkm_memory_target(map->memory)) { + case NVKM_MEM_TARGET_VRAM: + if (!(map->page->type & NVKM_VMM_PAGE_VRAM)) { + VMM_DEBUG(vmm, "%d !VRAM", map->page->shift); + return -EINVAL; + } + break; + case NVKM_MEM_TARGET_HOST: + case NVKM_MEM_TARGET_NCOH: + if (!(map->page->type & NVKM_VMM_PAGE_HOST)) { + VMM_DEBUG(vmm, "%d !HOST", map->page->shift); + return -EINVAL; + } + break; + default: + WARN_ON(1); + return -ENOSYS; + } + + if (!IS_ALIGNED( vma->addr, 1ULL << map->page->shift) || + !IS_ALIGNED((u64)vma->size, 1ULL << map->page->shift) || + !IS_ALIGNED( map->offset, 1ULL << map->page->shift) || + nvkm_memory_page(map->memory) < map->page->shift) { + VMM_DEBUG(vmm, "alignment %016llx %016llx %016llx %d %d", + vma->addr, (u64)vma->size, map->offset, map->page->shift, + nvkm_memory_page(map->memory)); + return -EINVAL; + } + + return vmm->func->valid(vmm, argv, argc, map); +} + +static int +nvkm_vmm_map_choose(struct nvkm_vmm *vmm, struct nvkm_vma *vma, + void *argv, u32 argc, struct nvkm_vmm_map *map) +{ + for (map->page = vmm->func->page; map->page->shift; map->page++) { + VMM_DEBUG(vmm, "trying %d", map->page->shift); + if (!nvkm_vmm_map_valid(vmm, vma, argv, argc, map)) + return 0; + } + return -EINVAL; +} + +static int +nvkm_vmm_map_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma, + void *argv, u32 argc, struct nvkm_vmm_map *map) +{ + nvkm_vmm_pte_func func; + int ret; + + /* Make sure we won't overrun the end of the memory object. */ + if (unlikely(nvkm_memory_size(map->memory) < map->offset + vma->size)) { + VMM_DEBUG(vmm, "overrun %016llx %016llx %016llx", + nvkm_memory_size(map->memory), + map->offset, (u64)vma->size); + return -EINVAL; + } + + /* Check remaining arguments for validity. */ + if (vma->page == NVKM_VMA_PAGE_NONE && + vma->refd == NVKM_VMA_PAGE_NONE) { + /* Find the largest page size we can perform the mapping at. */ + const u32 debug = vmm->debug; + vmm->debug = 0; + ret = nvkm_vmm_map_choose(vmm, vma, argv, argc, map); + vmm->debug = debug; + if (ret) { + VMM_DEBUG(vmm, "invalid at any page size"); + nvkm_vmm_map_choose(vmm, vma, argv, argc, map); + return -EINVAL; + } + } else { + /* Page size of the VMA is already pre-determined. */ + if (vma->refd != NVKM_VMA_PAGE_NONE) + map->page = &vmm->func->page[vma->refd]; + else + map->page = &vmm->func->page[vma->page]; + + ret = nvkm_vmm_map_valid(vmm, vma, argv, argc, map); + if (ret) { + VMM_DEBUG(vmm, "invalid %d\n", ret); + return ret; + } + } + + /* Deal with the 'offset' argument, and fetch the backend function. */ + map->off = map->offset; + if (map->mem) { + for (; map->off; map->mem = map->mem->next) { + u64 size = (u64)map->mem->length << NVKM_RAM_MM_SHIFT; + if (size > map->off) + break; + map->off -= size; + } + func = map->page->desc->func->mem; + } else + if (map->sgl) { + for (; map->off; map->sgl = sg_next(map->sgl)) { + u64 size = sg_dma_len(map->sgl); + if (size > map->off) + break; + map->off -= size; + } + func = map->page->desc->func->sgl; + } else { + map->dma += map->offset >> PAGE_SHIFT; + map->off = map->offset & PAGE_MASK; + func = map->page->desc->func->dma; + } + + /* Perform the map. */ + if (vma->refd == NVKM_VMA_PAGE_NONE) { + ret = nvkm_vmm_ptes_get_map(vmm, map->page, vma->addr, vma->size, map, func); + if (ret) + return ret; + + vma->refd = map->page - vmm->func->page; + } else { + nvkm_vmm_ptes_map(vmm, map->page, vma->addr, vma->size, map, func); + } + + nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags); + nvkm_memory_unref(&vma->memory); + vma->memory = nvkm_memory_ref(map->memory); + vma->tags = map->tags; + return 0; +} + +int +nvkm_vmm_map(struct nvkm_vmm *vmm, struct nvkm_vma *vma, void *argv, u32 argc, + struct nvkm_vmm_map *map) +{ + int ret; + mutex_lock(&vmm->mutex); + ret = nvkm_vmm_map_locked(vmm, vma, argv, argc, map); + vma->busy = false; + mutex_unlock(&vmm->mutex); + return ret; +} + +static void +nvkm_vmm_put_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + struct nvkm_vma *prev, *next; + + if ((prev = node(vma, prev)) && !prev->used) { + rb_erase(&prev->tree, &vmm->free); + list_del(&prev->head); + vma->addr = prev->addr; + vma->size += prev->size; + kfree(prev); + } + + if ((next = node(vma, next)) && !next->used) { + rb_erase(&next->tree, &vmm->free); + list_del(&next->head); + vma->size += next->size; + kfree(next); + } + + nvkm_vmm_free_insert(vmm, vma); +} + +void +nvkm_vmm_put_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + const struct nvkm_vmm_page *page = vmm->func->page; + struct nvkm_vma *next = vma; + + BUG_ON(vma->part); + + if (vma->mapref || !vma->sparse) { + do { + const bool map = next->memory != NULL; + const u8 refd = next->refd; + const u64 addr = next->addr; + u64 size = next->size; + + /* Merge regions that are in the same state. */ + while ((next = node(next, next)) && next->part && + (next->memory != NULL) == map && + (next->refd == refd)) + size += next->size; + + if (map) { + /* Region(s) are mapped, merge the unmap + * and dereference into a single walk of + * the page tree. + */ + nvkm_vmm_ptes_unmap_put(vmm, &page[refd], addr, + size, vma->sparse); + } else + if (refd != NVKM_VMA_PAGE_NONE) { + /* Drop allocation-time PTE references. */ + nvkm_vmm_ptes_put(vmm, &page[refd], addr, size); + } + } while (next && next->part); + } + + /* Merge any mapped regions that were split from the initial + * address-space allocation back into the allocated VMA, and + * release memory/compression resources. + */ + next = vma; + do { + if (next->memory) + nvkm_vmm_unmap_region(vmm, next); + } while ((next = node(vma, next)) && next->part); + + if (vma->sparse && !vma->mapref) { + /* Sparse region that was allocated with a fixed page size, + * meaning all relevant PTEs were referenced once when the + * region was allocated, and remained that way, regardless + * of whether memory was mapped into it afterwards. + * + * The process of unmapping, unsparsing, and dereferencing + * PTEs can be done in a single page tree walk. + */ + nvkm_vmm_ptes_sparse_put(vmm, &page[vma->refd], vma->addr, vma->size); + } else + if (vma->sparse) { + /* Sparse region that wasn't allocated with a fixed page size, + * PTE references were taken both at allocation time (to make + * the GPU see the region as sparse), and when mapping memory + * into the region. + * + * The latter was handled above, and the remaining references + * are dealt with here. + */ + nvkm_vmm_ptes_sparse(vmm, vma->addr, vma->size, false); + } + + /* Remove VMA from the list of allocated nodes. */ + rb_erase(&vma->tree, &vmm->root); + + /* Merge VMA back into the free list. */ + vma->page = NVKM_VMA_PAGE_NONE; + vma->refd = NVKM_VMA_PAGE_NONE; + vma->used = false; + vma->user = false; + nvkm_vmm_put_region(vmm, vma); +} + +void +nvkm_vmm_put(struct nvkm_vmm *vmm, struct nvkm_vma **pvma) +{ + struct nvkm_vma *vma = *pvma; + if (vma) { + mutex_lock(&vmm->mutex); + nvkm_vmm_put_locked(vmm, vma); + mutex_unlock(&vmm->mutex); + *pvma = NULL; + } +} + +int +nvkm_vmm_get_locked(struct nvkm_vmm *vmm, bool getref, bool mapref, bool sparse, + u8 shift, u8 align, u64 size, struct nvkm_vma **pvma) +{ + const struct nvkm_vmm_page *page = &vmm->func->page[NVKM_VMA_PAGE_NONE]; + struct rb_node *node = NULL, *temp; + struct nvkm_vma *vma = NULL, *tmp; + u64 addr, tail; + int ret; + + VMM_TRACE(vmm, "getref %d mapref %d sparse %d " + "shift: %d align: %d size: %016llx", + getref, mapref, sparse, shift, align, size); + + /* Zero-sized, or lazily-allocated sparse VMAs, make no sense. */ + if (unlikely(!size || (!getref && !mapref && sparse))) { + VMM_DEBUG(vmm, "args %016llx %d %d %d", + size, getref, mapref, sparse); + return -EINVAL; + } + + /* Tesla-class GPUs can only select page size per-PDE, which means + * we're required to know the mapping granularity up-front to find + * a suitable region of address-space. + * + * The same goes if we're requesting up-front allocation of PTES. + */ + if (unlikely((getref || vmm->func->page_block) && !shift)) { + VMM_DEBUG(vmm, "page size required: %d %016llx", + getref, vmm->func->page_block); + return -EINVAL; + } + + /* If a specific page size was requested, determine its index and + * make sure the requested size is a multiple of the page size. + */ + if (shift) { + for (page = vmm->func->page; page->shift; page++) { + if (shift == page->shift) + break; + } + + if (!page->shift || !IS_ALIGNED(size, 1ULL << page->shift)) { + VMM_DEBUG(vmm, "page %d %016llx", shift, size); + return -EINVAL; + } + align = max_t(u8, align, shift); + } else { + align = max_t(u8, align, 12); + } + + /* Locate smallest block that can possibly satisfy the allocation. */ + temp = vmm->free.rb_node; + while (temp) { + struct nvkm_vma *this = rb_entry(temp, typeof(*this), tree); + if (this->size < size) { + temp = temp->rb_right; + } else { + node = temp; + temp = temp->rb_left; + } + } + + if (unlikely(!node)) + return -ENOSPC; + + /* Take into account alignment restrictions, trying larger blocks + * in turn until we find a suitable free block. + */ + do { + struct nvkm_vma *this = rb_entry(node, typeof(*this), tree); + struct nvkm_vma *prev = node(this, prev); + struct nvkm_vma *next = node(this, next); + const int p = page - vmm->func->page; + + addr = this->addr; + if (vmm->func->page_block && prev && prev->page != p) + addr = ALIGN(addr, vmm->func->page_block); + addr = ALIGN(addr, 1ULL << align); + + tail = this->addr + this->size; + if (vmm->func->page_block && next && next->page != p) + tail = ALIGN_DOWN(addr, vmm->func->page_block); + + if (addr <= tail && tail - addr >= size) { + rb_erase(&this->tree, &vmm->free); + vma = this; + break; + } + } while ((node = rb_next(node))); + + if (unlikely(!vma)) + return -ENOSPC; + + /* If the VMA we found isn't already exactly the requested size, + * it needs to be split, and the remaining free blocks returned. + */ + if (addr != vma->addr) { + if (!(tmp = nvkm_vma_tail(vma, vma->size + vma->addr - addr))) { + nvkm_vmm_put_region(vmm, vma); + return -ENOMEM; + } + nvkm_vmm_free_insert(vmm, vma); + vma = tmp; + } + + if (size != vma->size) { + if (!(tmp = nvkm_vma_tail(vma, vma->size - size))) { + nvkm_vmm_put_region(vmm, vma); + return -ENOMEM; + } + nvkm_vmm_free_insert(vmm, tmp); + } + + /* Pre-allocate page tables and/or setup sparse mappings. */ + if (sparse && getref) + ret = nvkm_vmm_ptes_sparse_get(vmm, page, vma->addr, vma->size); + else if (sparse) + ret = nvkm_vmm_ptes_sparse(vmm, vma->addr, vma->size, true); + else if (getref) + ret = nvkm_vmm_ptes_get(vmm, page, vma->addr, vma->size); + else + ret = 0; + if (ret) { + nvkm_vmm_put_region(vmm, vma); + return ret; + } + + vma->mapref = mapref && !getref; + vma->sparse = sparse; + vma->page = page - vmm->func->page; + vma->refd = getref ? vma->page : NVKM_VMA_PAGE_NONE; + vma->used = true; + nvkm_vmm_node_insert(vmm, vma); + *pvma = vma; + return 0; +} + +int +nvkm_vmm_get(struct nvkm_vmm *vmm, u8 page, u64 size, struct nvkm_vma **pvma) +{ + int ret; + mutex_lock(&vmm->mutex); + ret = nvkm_vmm_get_locked(vmm, false, true, false, page, 0, size, pvma); + mutex_unlock(&vmm->mutex); + return ret; +} + +void +nvkm_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + if (vmm->func->part && inst) { + mutex_lock(&vmm->mutex); + vmm->func->part(vmm, inst); + mutex_unlock(&vmm->mutex); + } +} + +int +nvkm_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + int ret = 0; + if (vmm->func->join) { + mutex_lock(&vmm->mutex); + ret = vmm->func->join(vmm, inst); + mutex_unlock(&vmm->mutex); + } + return ret; +} + +static bool +nvkm_vmm_boot_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +{ + const struct nvkm_vmm_desc *desc = it->desc; + const int type = desc->type == SPT; + nvkm_memory_boot(it->pt[0]->pt[type]->memory, it->vmm); + return false; +} + +int +nvkm_vmm_boot(struct nvkm_vmm *vmm) +{ + const struct nvkm_vmm_page *page = vmm->func->page; + const u64 limit = vmm->limit - vmm->start; + int ret; + + while (page[1].shift) + page++; + + ret = nvkm_vmm_ptes_get(vmm, page, vmm->start, limit); + if (ret) + return ret; + + nvkm_vmm_iter(vmm, page, vmm->start, limit, "bootstrap", false, + nvkm_vmm_boot_ptes, NULL, NULL, NULL); + vmm->bootstrapped = true; + return 0; +} + +static void +nvkm_vmm_del(struct kref *kref) +{ + struct nvkm_vmm *vmm = container_of(kref, typeof(*vmm), kref); + nvkm_vmm_dtor(vmm); + kfree(vmm); +} + +void +nvkm_vmm_unref(struct nvkm_vmm **pvmm) +{ + struct nvkm_vmm *vmm = *pvmm; + if (vmm) { + kref_put(&vmm->kref, nvkm_vmm_del); + *pvmm = NULL; + } +} + +struct nvkm_vmm * +nvkm_vmm_ref(struct nvkm_vmm *vmm) +{ + if (vmm) + kref_get(&vmm->kref); + return vmm; +} + +int +nvkm_vmm_new(struct nvkm_device *device, u64 addr, u64 size, void *argv, + u32 argc, struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + struct nvkm_mmu *mmu = device->mmu; + struct nvkm_vmm *vmm = NULL; + int ret; + ret = mmu->func->vmm.ctor(mmu, addr, size, argv, argc, key, name, &vmm); + if (ret) + nvkm_vmm_unref(&vmm); + *pvmm = vmm; + return ret; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h new file mode 100644 index 000000000000..6d8f61ea467a --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -0,0 +1,310 @@ +#ifndef __NVKM_VMM_H__ +#define __NVKM_VMM_H__ +#include "priv.h" +#include <core/memory.h> +enum nvkm_memory_target; + +struct nvkm_vmm_pt { + /* Some GPUs have a mapping level with a dual page tables to + * support large and small pages in the same address-range. + * + * We track the state of both page tables in one place, which + * is why there's multiple PT pointers/refcounts here. + */ + struct nvkm_mmu_pt *pt[2]; + u32 refs[2]; + + /* Page size handled by this PT. + * + * Tesla backend needs to know this when writinge PDEs, + * otherwise unnecessary. + */ + u8 page; + + /* Entire page table sparse. + * + * Used to propagate sparseness to child page tables. + */ + bool sparse:1; + + /* Tracking for page directories. + * + * The array is indexed by PDE, and will either point to the + * child page table, or indicate the PDE is marked as sparse. + **/ +#define NVKM_VMM_PDE_INVALID(pde) IS_ERR_OR_NULL(pde) +#define NVKM_VMM_PDE_SPARSED(pde) IS_ERR(pde) +#define NVKM_VMM_PDE_SPARSE ERR_PTR(-EBUSY) + struct nvkm_vmm_pt **pde; + + /* Tracking for dual page tables. + * + * There's one entry for each LPTE, keeping track of whether + * there are valid SPTEs in the same address-range. + * + * This information is used to manage LPTE state transitions. + */ +#define NVKM_VMM_PTE_SPARSE 0x80 +#define NVKM_VMM_PTE_VALID 0x40 +#define NVKM_VMM_PTE_SPTES 0x3f + u8 pte[]; +}; + +typedef void (*nvkm_vmm_pxe_func)(struct nvkm_vmm *, + struct nvkm_mmu_pt *, u32 ptei, u32 ptes); +typedef void (*nvkm_vmm_pde_func)(struct nvkm_vmm *, + struct nvkm_vmm_pt *, u32 pdei); +typedef void (*nvkm_vmm_pte_func)(struct nvkm_vmm *, struct nvkm_mmu_pt *, + u32 ptei, u32 ptes, struct nvkm_vmm_map *); + +struct nvkm_vmm_desc_func { + nvkm_vmm_pxe_func invalid; + nvkm_vmm_pxe_func unmap; + nvkm_vmm_pxe_func sparse; + + nvkm_vmm_pde_func pde; + + nvkm_vmm_pte_func mem; + nvkm_vmm_pte_func dma; + nvkm_vmm_pte_func sgl; +}; + +extern const struct nvkm_vmm_desc_func gf100_vmm_pgd; +void gf100_vmm_pgd_pde(struct nvkm_vmm *, struct nvkm_vmm_pt *, u32); +extern const struct nvkm_vmm_desc_func gf100_vmm_pgt; +void gf100_vmm_pgt_unmap(struct nvkm_vmm *, struct nvkm_mmu_pt *, u32, u32); +void gf100_vmm_pgt_mem(struct nvkm_vmm *, struct nvkm_mmu_pt *, u32, u32, + struct nvkm_vmm_map *); +void gf100_vmm_pgt_dma(struct nvkm_vmm *, struct nvkm_mmu_pt *, u32, u32, + struct nvkm_vmm_map *); +void gf100_vmm_pgt_sgl(struct nvkm_vmm *, struct nvkm_mmu_pt *, u32, u32, + struct nvkm_vmm_map *); + +void gk104_vmm_lpt_invalid(struct nvkm_vmm *, struct nvkm_mmu_pt *, u32, u32); + +struct nvkm_vmm_desc { + enum { + PGD, + PGT, + SPT, + LPT, + } type; + u8 bits; /* VMA bits covered by PT. */ + u8 size; /* Bytes-per-PTE. */ + u32 align; /* PT address alignment. */ + const struct nvkm_vmm_desc_func *func; +}; + +extern const struct nvkm_vmm_desc gk104_vmm_desc_16_12[]; +extern const struct nvkm_vmm_desc gk104_vmm_desc_16_16[]; +extern const struct nvkm_vmm_desc gk104_vmm_desc_17_12[]; +extern const struct nvkm_vmm_desc gk104_vmm_desc_17_17[]; + +extern const struct nvkm_vmm_desc gm200_vmm_desc_16_12[]; +extern const struct nvkm_vmm_desc gm200_vmm_desc_16_16[]; +extern const struct nvkm_vmm_desc gm200_vmm_desc_17_12[]; +extern const struct nvkm_vmm_desc gm200_vmm_desc_17_17[]; + +extern const struct nvkm_vmm_desc gp100_vmm_desc_12[]; +extern const struct nvkm_vmm_desc gp100_vmm_desc_16[]; + +struct nvkm_vmm_page { + u8 shift; + const struct nvkm_vmm_desc *desc; +#define NVKM_VMM_PAGE_SPARSE 0x01 +#define NVKM_VMM_PAGE_VRAM 0x02 +#define NVKM_VMM_PAGE_HOST 0x04 +#define NVKM_VMM_PAGE_COMP 0x08 +#define NVKM_VMM_PAGE_Sxxx (NVKM_VMM_PAGE_SPARSE) +#define NVKM_VMM_PAGE_xVxx (NVKM_VMM_PAGE_VRAM) +#define NVKM_VMM_PAGE_SVxx (NVKM_VMM_PAGE_Sxxx | NVKM_VMM_PAGE_VRAM) +#define NVKM_VMM_PAGE_xxHx (NVKM_VMM_PAGE_HOST) +#define NVKM_VMM_PAGE_SxHx (NVKM_VMM_PAGE_Sxxx | NVKM_VMM_PAGE_HOST) +#define NVKM_VMM_PAGE_xVHx (NVKM_VMM_PAGE_xVxx | NVKM_VMM_PAGE_HOST) +#define NVKM_VMM_PAGE_SVHx (NVKM_VMM_PAGE_SVxx | NVKM_VMM_PAGE_HOST) +#define NVKM_VMM_PAGE_xVxC (NVKM_VMM_PAGE_xVxx | NVKM_VMM_PAGE_COMP) +#define NVKM_VMM_PAGE_SVxC (NVKM_VMM_PAGE_SVxx | NVKM_VMM_PAGE_COMP) +#define NVKM_VMM_PAGE_xxHC (NVKM_VMM_PAGE_xxHx | NVKM_VMM_PAGE_COMP) +#define NVKM_VMM_PAGE_SxHC (NVKM_VMM_PAGE_SxHx | NVKM_VMM_PAGE_COMP) + u8 type; +}; + +struct nvkm_vmm_func { + int (*join)(struct nvkm_vmm *, struct nvkm_memory *inst); + void (*part)(struct nvkm_vmm *, struct nvkm_memory *inst); + + int (*aper)(enum nvkm_memory_target); + int (*valid)(struct nvkm_vmm *, void *argv, u32 argc, + struct nvkm_vmm_map *); + void (*flush)(struct nvkm_vmm *, int depth); + + u64 page_block; + const struct nvkm_vmm_page page[]; +}; + +struct nvkm_vmm_join { + struct nvkm_memory *inst; + struct list_head head; +}; + +int nvkm_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *, + u32 pd_header, u64 addr, u64 size, struct lock_class_key *, + const char *name, struct nvkm_vmm **); +int nvkm_vmm_ctor(const struct nvkm_vmm_func *, struct nvkm_mmu *, + u32 pd_header, u64 addr, u64 size, struct lock_class_key *, + const char *name, struct nvkm_vmm *); +struct nvkm_vma *nvkm_vmm_node_search(struct nvkm_vmm *, u64 addr); +int nvkm_vmm_get_locked(struct nvkm_vmm *, bool getref, bool mapref, + bool sparse, u8 page, u8 align, u64 size, + struct nvkm_vma **pvma); +void nvkm_vmm_put_locked(struct nvkm_vmm *, struct nvkm_vma *); +void nvkm_vmm_unmap_locked(struct nvkm_vmm *, struct nvkm_vma *); +void nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma); + +struct nvkm_vma *nvkm_vma_tail(struct nvkm_vma *, u64 tail); +void nvkm_vmm_node_insert(struct nvkm_vmm *, struct nvkm_vma *); + +int nv04_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *, u32, + u64, u64, void *, u32, struct lock_class_key *, + const char *, struct nvkm_vmm **); +int nv04_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); + +int gf100_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *, + struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int gf100_vmm_join_(struct nvkm_vmm *, struct nvkm_memory *, u64 base); +int gf100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); +void gf100_vmm_part(struct nvkm_vmm *, struct nvkm_memory *); +int gf100_vmm_aper(enum nvkm_memory_target); +int gf100_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); +void gf100_vmm_flush_(struct nvkm_vmm *, int); +void gf100_vmm_flush(struct nvkm_vmm *, int); + +int gk20a_vmm_aper(enum nvkm_memory_target); + +int gm200_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *, + struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int gm200_vmm_join_(struct nvkm_vmm *, struct nvkm_memory *, u64 base); +int gm200_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); + +int gp100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); +int gp100_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); +void gp100_vmm_flush(struct nvkm_vmm *, int); + +int nv04_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int nv41_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int nv44_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int nv50_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int g84_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int gf100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int gk104_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int gk20a_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int gm200_vmm_new_fixed(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, + struct nvkm_vmm **); +int gm200_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, + struct nvkm_vmm **); +int gm20b_vmm_new_fixed(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, + struct nvkm_vmm **); +int gm20b_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, + struct nvkm_vmm **); +int gp100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, + struct nvkm_vmm **); +int gp10b_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, + struct nvkm_vmm **); + +#define VMM_PRINT(l,v,p,f,a...) do { \ + struct nvkm_vmm *_vmm = (v); \ + if (CONFIG_NOUVEAU_DEBUG >= (l) && _vmm->debug >= (l)) { \ + nvkm_printk_(&_vmm->mmu->subdev, 0, p, "%s: "f"\n", \ + _vmm->name, ##a); \ + } \ +} while(0) +#define VMM_DEBUG(v,f,a...) VMM_PRINT(NV_DBG_DEBUG, (v), info, f, ##a) +#define VMM_TRACE(v,f,a...) VMM_PRINT(NV_DBG_TRACE, (v), info, f, ##a) +#define VMM_SPAM(v,f,a...) VMM_PRINT(NV_DBG_SPAM , (v), dbg, f, ##a) + +#define VMM_MAP_ITER(VMM,PT,PTEI,PTEN,MAP,FILL,BASE,SIZE,NEXT) do { \ + nvkm_kmap((PT)->memory); \ + while (PTEN) { \ + u64 _ptes = ((SIZE) - MAP->off) >> MAP->page->shift; \ + u64 _addr = ((BASE) + MAP->off); \ + \ + if (_ptes > PTEN) { \ + MAP->off += PTEN << MAP->page->shift; \ + _ptes = PTEN; \ + } else { \ + MAP->off = 0; \ + NEXT; \ + } \ + \ + VMM_SPAM(VMM, "ITER %08x %08x PTE(s)", PTEI, (u32)_ptes); \ + \ + FILL(VMM, PT, PTEI, _ptes, MAP, _addr); \ + PTEI += _ptes; \ + PTEN -= _ptes; \ + }; \ + nvkm_done((PT)->memory); \ +} while(0) + +#define VMM_MAP_ITER_MEM(VMM,PT,PTEI,PTEN,MAP,FILL) \ + VMM_MAP_ITER(VMM,PT,PTEI,PTEN,MAP,FILL, \ + ((u64)MAP->mem->offset << NVKM_RAM_MM_SHIFT), \ + ((u64)MAP->mem->length << NVKM_RAM_MM_SHIFT), \ + (MAP->mem = MAP->mem->next)) +#define VMM_MAP_ITER_DMA(VMM,PT,PTEI,PTEN,MAP,FILL) \ + VMM_MAP_ITER(VMM,PT,PTEI,PTEN,MAP,FILL, \ + *MAP->dma, PAGE_SIZE, MAP->dma++) +#define VMM_MAP_ITER_SGL(VMM,PT,PTEI,PTEN,MAP,FILL) \ + VMM_MAP_ITER(VMM,PT,PTEI,PTEN,MAP,FILL, \ + sg_dma_address(MAP->sgl), sg_dma_len(MAP->sgl), \ + (MAP->sgl = sg_next(MAP->sgl))) + +#define VMM_FO(m,o,d,c,b) nvkm_fo##b((m)->memory, (o), (d), (c)) +#define VMM_WO(m,o,d,c,b) nvkm_wo##b((m)->memory, (o), (d)) +#define VMM_XO(m,v,o,d,c,b,fn,f,a...) do { \ + const u32 _pteo = (o); u##b _data = (d); \ + VMM_SPAM((v), " %010llx "f, (m)->addr + _pteo, _data, ##a); \ + VMM_##fn((m), (m)->base + _pteo, _data, (c), b); \ +} while(0) + +#define VMM_WO032(m,v,o,d) VMM_XO((m),(v),(o),(d), 1, 32, WO, "%08x") +#define VMM_FO032(m,v,o,d,c) \ + VMM_XO((m),(v),(o),(d),(c), 32, FO, "%08x %08x", (c)) + +#define VMM_WO064(m,v,o,d) VMM_XO((m),(v),(o),(d), 1, 64, WO, "%016llx") +#define VMM_FO064(m,v,o,d,c) \ + VMM_XO((m),(v),(o),(d),(c), 64, FO, "%016llx %08x", (c)) + +#define VMM_XO128(m,v,o,lo,hi,c,f,a...) do { \ + u32 _pteo = (o), _ptes = (c); \ + const u64 _addr = (m)->addr + _pteo; \ + VMM_SPAM((v), " %010llx %016llx%016llx"f, _addr, (hi), (lo), ##a); \ + while (_ptes--) { \ + nvkm_wo64((m)->memory, (m)->base + _pteo + 0, (lo)); \ + nvkm_wo64((m)->memory, (m)->base + _pteo + 8, (hi)); \ + _pteo += 0x10; \ + } \ +} while(0) + +#define VMM_WO128(m,v,o,lo,hi) VMM_XO128((m),(v),(o),(lo),(hi), 1, "") +#define VMM_FO128(m,v,o,lo,hi,c) do { \ + nvkm_kmap((m)->memory); \ + VMM_XO128((m),(v),(o),(lo),(hi),(c), " %08x", (c)); \ + nvkm_done((m)->memory); \ +} while(0) +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c new file mode 100644 index 000000000000..faf5a7e9265e --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c @@ -0,0 +1,403 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <subdev/fb.h> +#include <subdev/ltc.h> +#include <subdev/timer.h> + +#include <nvif/if900d.h> +#include <nvif/unpack.h> + +static inline void +gf100_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) +{ + u64 base = (addr >> 8) | map->type; + u64 data = base; + + if (map->ctag && !(map->next & (1ULL << 44))) { + while (ptes--) { + data = base | ((map->ctag >> 1) << 44); + if (!(map->ctag++ & 1)) + data |= BIT_ULL(60); + + VMM_WO064(pt, vmm, ptei++ * 8, data); + base += map->next; + } + } else { + map->type += ptes * map->ctag; + + while (ptes--) { + VMM_WO064(pt, vmm, ptei++ * 8, data); + data += map->next; + } + } +} + +void +gf100_vmm_pgt_sgl(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_SGL(vmm, pt, ptei, ptes, map, gf100_vmm_pgt_pte); +} + +void +gf100_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + if (map->page->shift == PAGE_SHIFT) { + VMM_SPAM(vmm, "DMAA %08x %08x PTE(s)", ptei, ptes); + nvkm_kmap(pt->memory); + while (ptes--) { + const u64 data = (*map->dma++ >> 8) | map->type; + VMM_WO064(pt, vmm, ptei++ * 8, data); + map->type += map->ctag; + } + nvkm_done(pt->memory); + return; + } + + VMM_MAP_ITER_DMA(vmm, pt, ptei, ptes, map, gf100_vmm_pgt_pte); +} + +void +gf100_vmm_pgt_mem(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_MEM(vmm, pt, ptei, ptes, map, gf100_vmm_pgt_pte); +} + +void +gf100_vmm_pgt_unmap(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + VMM_FO064(pt, vmm, ptei * 8, 0ULL, ptes); +} + +const struct nvkm_vmm_desc_func +gf100_vmm_pgt = { + .unmap = gf100_vmm_pgt_unmap, + .mem = gf100_vmm_pgt_mem, + .dma = gf100_vmm_pgt_dma, + .sgl = gf100_vmm_pgt_sgl, +}; + +void +gf100_vmm_pgd_pde(struct nvkm_vmm *vmm, struct nvkm_vmm_pt *pgd, u32 pdei) +{ + struct nvkm_vmm_pt *pgt = pgd->pde[pdei]; + struct nvkm_mmu_pt *pd = pgd->pt[0]; + struct nvkm_mmu_pt *pt; + u64 data = 0; + + if ((pt = pgt->pt[0])) { + switch (nvkm_memory_target(pt->memory)) { + case NVKM_MEM_TARGET_VRAM: data |= 1ULL << 0; break; + case NVKM_MEM_TARGET_HOST: data |= 2ULL << 0; + data |= BIT_ULL(35); /* VOL */ + break; + case NVKM_MEM_TARGET_NCOH: data |= 3ULL << 0; break; + default: + WARN_ON(1); + return; + } + data |= pt->addr >> 8; + } + + if ((pt = pgt->pt[1])) { + switch (nvkm_memory_target(pt->memory)) { + case NVKM_MEM_TARGET_VRAM: data |= 1ULL << 32; break; + case NVKM_MEM_TARGET_HOST: data |= 2ULL << 32; + data |= BIT_ULL(34); /* VOL */ + break; + case NVKM_MEM_TARGET_NCOH: data |= 3ULL << 32; break; + default: + WARN_ON(1); + return; + } + data |= pt->addr << 24; + } + + nvkm_kmap(pd->memory); + VMM_WO064(pd, vmm, pdei * 8, data); + nvkm_done(pd->memory); +} + +const struct nvkm_vmm_desc_func +gf100_vmm_pgd = { + .unmap = gf100_vmm_pgt_unmap, + .pde = gf100_vmm_pgd_pde, +}; + +static const struct nvkm_vmm_desc +gf100_vmm_desc_17_12[] = { + { SPT, 15, 8, 0x1000, &gf100_vmm_pgt }, + { PGD, 13, 8, 0x1000, &gf100_vmm_pgd }, + {} +}; + +static const struct nvkm_vmm_desc +gf100_vmm_desc_17_17[] = { + { LPT, 10, 8, 0x1000, &gf100_vmm_pgt }, + { PGD, 13, 8, 0x1000, &gf100_vmm_pgd }, + {} +}; + +static const struct nvkm_vmm_desc +gf100_vmm_desc_16_12[] = { + { SPT, 14, 8, 0x1000, &gf100_vmm_pgt }, + { PGD, 14, 8, 0x1000, &gf100_vmm_pgd }, + {} +}; + +static const struct nvkm_vmm_desc +gf100_vmm_desc_16_16[] = { + { LPT, 10, 8, 0x1000, &gf100_vmm_pgt }, + { PGD, 14, 8, 0x1000, &gf100_vmm_pgd }, + {} +}; + +void +gf100_vmm_flush_(struct nvkm_vmm *vmm, int depth) +{ + struct nvkm_subdev *subdev = &vmm->mmu->subdev; + struct nvkm_device *device = subdev->device; + u32 type = depth << 24; + + type = 0x00000001; /* PAGE_ALL */ + if (atomic_read(&vmm->engref[NVKM_SUBDEV_BAR])) + type |= 0x00000004; /* HUB_ONLY */ + + mutex_lock(&subdev->mutex); + /* Looks like maybe a "free flush slots" counter, the + * faster you write to 0x100cbc to more it decreases. + */ + nvkm_msec(device, 2000, + if (nvkm_rd32(device, 0x100c80) & 0x00ff0000) + break; + ); + + nvkm_wr32(device, 0x100cb8, vmm->pd->pt[0]->addr >> 8); + nvkm_wr32(device, 0x100cbc, 0x80000000 | type); + + /* Wait for flush to be queued? */ + nvkm_msec(device, 2000, + if (nvkm_rd32(device, 0x100c80) & 0x00008000) + break; + ); + mutex_unlock(&subdev->mutex); +} + +void +gf100_vmm_flush(struct nvkm_vmm *vmm, int depth) +{ + gf100_vmm_flush_(vmm, 0); +} + +int +gf100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, + struct nvkm_vmm_map *map) +{ + const enum nvkm_memory_target target = nvkm_memory_target(map->memory); + const struct nvkm_vmm_page *page = map->page; + const bool gm20x = page->desc->func->sparse != NULL; + union { + struct gf100_vmm_map_vn vn; + struct gf100_vmm_map_v0 v0; + } *args = argv; + struct nvkm_device *device = vmm->mmu->subdev.device; + struct nvkm_memory *memory = map->memory; + u8 kind, priv, ro, vol; + int kindn, aper, ret = -ENOSYS; + const u8 *kindm; + + map->next = (1 << page->shift) >> 8; + map->type = map->ctag = 0; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + vol = !!args->v0.vol; + ro = !!args->v0.ro; + priv = !!args->v0.priv; + kind = args->v0.kind; + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + vol = target == NVKM_MEM_TARGET_HOST; + ro = 0; + priv = 0; + kind = 0x00; + } else { + VMM_DEBUG(vmm, "args"); + return ret; + } + + aper = vmm->func->aper(target); + if (WARN_ON(aper < 0)) + return aper; + + kindm = vmm->mmu->func->kind(vmm->mmu, &kindn); + if (kind >= kindn || kindm[kind] == 0xff) { + VMM_DEBUG(vmm, "kind %02x", kind); + return -EINVAL; + } + + if (kindm[kind] != kind) { + u32 comp = (page->shift == 16 && !gm20x) ? 16 : 17; + u32 tags = ALIGN(nvkm_memory_size(memory), 1 << 17) >> comp; + if (aper != 0 || !(page->type & NVKM_VMM_PAGE_COMP)) { + VMM_DEBUG(vmm, "comp %d %02x", aper, page->type); + return -EINVAL; + } + + ret = nvkm_memory_tags_get(memory, device, tags, + nvkm_ltc_tags_clear, + &map->tags); + if (ret) { + VMM_DEBUG(vmm, "comp %d", ret); + return ret; + } + + if (map->tags->mn) { + u64 tags = map->tags->mn->offset + (map->offset >> 17); + if (page->shift == 17 || !gm20x) { + map->type |= tags << 44; + map->ctag |= 1ULL << 44; + map->next |= 1ULL << 44; + } else { + map->ctag |= tags << 1 | 1; + } + } else { + kind = kindm[kind]; + } + } + + map->type |= BIT(0); + map->type |= (u64)priv << 1; + map->type |= (u64) ro << 2; + map->type |= (u64) vol << 32; + map->type |= (u64)aper << 33; + map->type |= (u64)kind << 36; + return 0; +} + +int +gf100_vmm_aper(enum nvkm_memory_target target) +{ + switch (target) { + case NVKM_MEM_TARGET_VRAM: return 0; + case NVKM_MEM_TARGET_HOST: return 2; + case NVKM_MEM_TARGET_NCOH: return 3; + default: + return -EINVAL; + } +} + +void +gf100_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + nvkm_fo64(inst, 0x0200, 0x00000000, 2); +} + +int +gf100_vmm_join_(struct nvkm_vmm *vmm, struct nvkm_memory *inst, u64 base) +{ + struct nvkm_mmu_pt *pd = vmm->pd->pt[0]; + + switch (nvkm_memory_target(pd->memory)) { + case NVKM_MEM_TARGET_VRAM: base |= 0ULL << 0; break; + case NVKM_MEM_TARGET_HOST: base |= 2ULL << 0; + base |= BIT_ULL(2) /* VOL. */; + break; + case NVKM_MEM_TARGET_NCOH: base |= 3ULL << 0; break; + default: + WARN_ON(1); + return -EINVAL; + } + base |= pd->addr; + + nvkm_kmap(inst); + nvkm_wo64(inst, 0x0200, base); + nvkm_wo64(inst, 0x0208, vmm->limit - 1); + nvkm_done(inst); + return 0; +} + +int +gf100_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + return gf100_vmm_join_(vmm, inst, 0); +} + +static const struct nvkm_vmm_func +gf100_vmm_17 = { + .join = gf100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 17, &gf100_vmm_desc_17_17[0], NVKM_VMM_PAGE_xVxC }, + { 12, &gf100_vmm_desc_17_12[0], NVKM_VMM_PAGE_xVHx }, + {} + } +}; + +static const struct nvkm_vmm_func +gf100_vmm_16 = { + .join = gf100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 16, &gf100_vmm_desc_16_16[0], NVKM_VMM_PAGE_xVxC }, + { 12, &gf100_vmm_desc_16_12[0], NVKM_VMM_PAGE_xVHx }, + {} + } +}; + +int +gf100_vmm_new_(const struct nvkm_vmm_func *func_16, + const struct nvkm_vmm_func *func_17, + struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + switch (mmu->subdev.device->fb->page) { + case 16: return nv04_vmm_new_(func_16, mmu, 0, addr, size, + argv, argc, key, name, pvmm); + case 17: return nv04_vmm_new_(func_17, mmu, 0, addr, size, + argv, argc, key, name, pvmm); + default: + WARN_ON(1); + return -EINVAL; + } +} + +int +gf100_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return gf100_vmm_new_(&gf100_vmm_16, &gf100_vmm_17, mmu, addr, + size, argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c new file mode 100644 index 000000000000..0ebb7bccfcd2 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c @@ -0,0 +1,102 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +void +gk104_vmm_lpt_invalid(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + /* VALID_FALSE + PRIV tells the MMU to ignore corresponding SPTEs. */ + VMM_FO064(pt, vmm, ptei * 8, BIT_ULL(1) /* PRIV. */, ptes); +} + +static const struct nvkm_vmm_desc_func +gk104_vmm_lpt = { + .invalid = gk104_vmm_lpt_invalid, + .unmap = gf100_vmm_pgt_unmap, + .mem = gf100_vmm_pgt_mem, +}; + +const struct nvkm_vmm_desc +gk104_vmm_desc_17_12[] = { + { SPT, 15, 8, 0x1000, &gf100_vmm_pgt }, + { PGD, 13, 8, 0x1000, &gf100_vmm_pgd }, + {} +}; + +const struct nvkm_vmm_desc +gk104_vmm_desc_17_17[] = { + { LPT, 10, 8, 0x1000, &gk104_vmm_lpt }, + { PGD, 13, 8, 0x1000, &gf100_vmm_pgd }, + {} +}; + +const struct nvkm_vmm_desc +gk104_vmm_desc_16_12[] = { + { SPT, 14, 8, 0x1000, &gf100_vmm_pgt }, + { PGD, 14, 8, 0x1000, &gf100_vmm_pgd }, + {} +}; + +const struct nvkm_vmm_desc +gk104_vmm_desc_16_16[] = { + { LPT, 10, 8, 0x1000, &gk104_vmm_lpt }, + { PGD, 14, 8, 0x1000, &gf100_vmm_pgd }, + {} +}; + +static const struct nvkm_vmm_func +gk104_vmm_17 = { + .join = gf100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 17, &gk104_vmm_desc_17_17[0], NVKM_VMM_PAGE_xVxC }, + { 12, &gk104_vmm_desc_17_12[0], NVKM_VMM_PAGE_xVHx }, + {} + } +}; + +static const struct nvkm_vmm_func +gk104_vmm_16 = { + .join = gf100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 16, &gk104_vmm_desc_16_16[0], NVKM_VMM_PAGE_xVxC }, + { 12, &gk104_vmm_desc_16_12[0], NVKM_VMM_PAGE_xVHx }, + {} + } +}; + +int +gk104_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return gf100_vmm_new_(&gk104_vmm_16, &gk104_vmm_17, mmu, addr, + size, argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c new file mode 100644 index 000000000000..8086994a0446 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c @@ -0,0 +1,71 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <core/memory.h> + +int +gk20a_vmm_aper(enum nvkm_memory_target target) +{ + switch (target) { + case NVKM_MEM_TARGET_NCOH: return 0; + default: + return -EINVAL; + } +} + +static const struct nvkm_vmm_func +gk20a_vmm_17 = { + .join = gf100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 17, &gk104_vmm_desc_17_17[0], NVKM_VMM_PAGE_xxHC }, + { 12, &gk104_vmm_desc_17_12[0], NVKM_VMM_PAGE_xxHx }, + {} + } +}; + +static const struct nvkm_vmm_func +gk20a_vmm_16 = { + .join = gf100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 16, &gk104_vmm_desc_16_16[0], NVKM_VMM_PAGE_xxHC }, + { 12, &gk104_vmm_desc_16_12[0], NVKM_VMM_PAGE_xxHx }, + {} + } +}; + +int +gk20a_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return gf100_vmm_new_(&gk20a_vmm_16, &gk20a_vmm_17, mmu, addr, + size, argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c new file mode 100644 index 000000000000..a1676a4644fe --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c @@ -0,0 +1,185 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <nvif/ifb00d.h> +#include <nvif/unpack.h> + +static void +gm200_vmm_pgt_sparse(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + /* VALID_FALSE + VOL tells the MMU to treat the PTE as sparse. */ + VMM_FO064(pt, vmm, ptei * 8, BIT_ULL(32) /* VOL. */, ptes); +} + +static const struct nvkm_vmm_desc_func +gm200_vmm_spt = { + .unmap = gf100_vmm_pgt_unmap, + .sparse = gm200_vmm_pgt_sparse, + .mem = gf100_vmm_pgt_mem, + .dma = gf100_vmm_pgt_dma, + .sgl = gf100_vmm_pgt_sgl, +}; + +static const struct nvkm_vmm_desc_func +gm200_vmm_lpt = { + .invalid = gk104_vmm_lpt_invalid, + .unmap = gf100_vmm_pgt_unmap, + .sparse = gm200_vmm_pgt_sparse, + .mem = gf100_vmm_pgt_mem, +}; + +static void +gm200_vmm_pgd_sparse(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 pdei, u32 pdes) +{ + /* VALID_FALSE + VOL_BIG tells the MMU to treat the PDE as sparse. */ + VMM_FO064(pt, vmm, pdei * 8, BIT_ULL(35) /* VOL_BIG. */, pdes); +} + +static const struct nvkm_vmm_desc_func +gm200_vmm_pgd = { + .unmap = gf100_vmm_pgt_unmap, + .sparse = gm200_vmm_pgd_sparse, + .pde = gf100_vmm_pgd_pde, +}; + +const struct nvkm_vmm_desc +gm200_vmm_desc_17_12[] = { + { SPT, 15, 8, 0x1000, &gm200_vmm_spt }, + { PGD, 13, 8, 0x1000, &gm200_vmm_pgd }, + {} +}; + +const struct nvkm_vmm_desc +gm200_vmm_desc_17_17[] = { + { LPT, 10, 8, 0x1000, &gm200_vmm_lpt }, + { PGD, 13, 8, 0x1000, &gm200_vmm_pgd }, + {} +}; + +const struct nvkm_vmm_desc +gm200_vmm_desc_16_12[] = { + { SPT, 14, 8, 0x1000, &gm200_vmm_spt }, + { PGD, 14, 8, 0x1000, &gm200_vmm_pgd }, + {} +}; + +const struct nvkm_vmm_desc +gm200_vmm_desc_16_16[] = { + { LPT, 10, 8, 0x1000, &gm200_vmm_lpt }, + { PGD, 14, 8, 0x1000, &gm200_vmm_pgd }, + {} +}; + +int +gm200_vmm_join_(struct nvkm_vmm *vmm, struct nvkm_memory *inst, u64 base) +{ + if (vmm->func->page[1].shift == 16) + base |= BIT_ULL(11); + return gf100_vmm_join_(vmm, inst, base); +} + +int +gm200_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + return gm200_vmm_join_(vmm, inst, 0); +} + +static const struct nvkm_vmm_func +gm200_vmm_17 = { + .join = gm200_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 27, &gm200_vmm_desc_17_17[1], NVKM_VMM_PAGE_Sxxx }, + { 17, &gm200_vmm_desc_17_17[0], NVKM_VMM_PAGE_SVxC }, + { 12, &gm200_vmm_desc_17_12[0], NVKM_VMM_PAGE_SVHx }, + {} + } +}; + +static const struct nvkm_vmm_func +gm200_vmm_16 = { + .join = gm200_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 27, &gm200_vmm_desc_16_16[1], NVKM_VMM_PAGE_Sxxx }, + { 16, &gm200_vmm_desc_16_16[0], NVKM_VMM_PAGE_SVxC }, + { 12, &gm200_vmm_desc_16_12[0], NVKM_VMM_PAGE_SVHx }, + {} + } +}; + +int +gm200_vmm_new_(const struct nvkm_vmm_func *func_16, + const struct nvkm_vmm_func *func_17, + struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + const struct nvkm_vmm_func *func; + union { + struct gm200_vmm_vn vn; + struct gm200_vmm_v0 v0; + } *args = argv; + int ret = -ENOSYS; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + switch (args->v0.bigpage) { + case 16: func = func_16; break; + case 17: func = func_17; break; + default: + return -EINVAL; + } + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + func = func_17; + } else + return ret; + + return nvkm_vmm_new_(func, mmu, 0, addr, size, key, name, pvmm); +} + +int +gm200_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return gm200_vmm_new_(&gm200_vmm_16, &gm200_vmm_17, mmu, addr, + size, argv, argc, key, name, pvmm); +} + +int +gm200_vmm_new_fixed(struct nvkm_mmu *mmu, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) +{ + return gf100_vmm_new_(&gm200_vmm_16, &gm200_vmm_17, mmu, addr, + size, argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c new file mode 100644 index 000000000000..64d4b6cff8dd --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c @@ -0,0 +1,70 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +static const struct nvkm_vmm_func +gm20b_vmm_17 = { + .join = gm200_vmm_join, + .part = gf100_vmm_part, + .aper = gk20a_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 27, &gm200_vmm_desc_17_17[1], NVKM_VMM_PAGE_Sxxx }, + { 17, &gm200_vmm_desc_17_17[0], NVKM_VMM_PAGE_SxHC }, + { 12, &gm200_vmm_desc_17_12[0], NVKM_VMM_PAGE_SxHx }, + {} + } +}; + +static const struct nvkm_vmm_func +gm20b_vmm_16 = { + .join = gm200_vmm_join, + .part = gf100_vmm_part, + .aper = gk20a_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 27, &gm200_vmm_desc_16_16[1], NVKM_VMM_PAGE_Sxxx }, + { 16, &gm200_vmm_desc_16_16[0], NVKM_VMM_PAGE_SxHC }, + { 12, &gm200_vmm_desc_16_12[0], NVKM_VMM_PAGE_SxHx }, + {} + } +}; + +int +gm20b_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return gm200_vmm_new_(&gm20b_vmm_16, &gm20b_vmm_17, mmu, addr, + size, argv, argc, key, name, pvmm); +} + +int +gm20b_vmm_new_fixed(struct nvkm_mmu *mmu, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) +{ + return gf100_vmm_new_(&gm20b_vmm_16, &gm20b_vmm_17, mmu, addr, + size, argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c new file mode 100644 index 000000000000..059fafe0e771 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c @@ -0,0 +1,347 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <subdev/fb.h> +#include <subdev/ltc.h> + +#include <nvif/ifc00d.h> +#include <nvif/unpack.h> + +static inline void +gp100_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) +{ + u64 data = (addr >> 4) | map->type; + + map->type += ptes * map->ctag; + + while (ptes--) { + VMM_WO064(pt, vmm, ptei++ * 8, data); + data += map->next; + } +} + +static void +gp100_vmm_pgt_sgl(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_SGL(vmm, pt, ptei, ptes, map, gp100_vmm_pgt_pte); +} + +static void +gp100_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + if (map->page->shift == PAGE_SHIFT) { + VMM_SPAM(vmm, "DMAA %08x %08x PTE(s)", ptei, ptes); + nvkm_kmap(pt->memory); + while (ptes--) { + const u64 data = (*map->dma++ >> 4) | map->type; + VMM_WO064(pt, vmm, ptei++ * 8, data); + map->type += map->ctag; + } + nvkm_done(pt->memory); + return; + } + + VMM_MAP_ITER_DMA(vmm, pt, ptei, ptes, map, gp100_vmm_pgt_pte); +} + +static void +gp100_vmm_pgt_mem(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_MEM(vmm, pt, ptei, ptes, map, gp100_vmm_pgt_pte); +} + +static void +gp100_vmm_pgt_sparse(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + /* VALID_FALSE + VOL tells the MMU to treat the PTE as sparse. */ + VMM_FO064(pt, vmm, ptei * 8, BIT_ULL(3) /* VOL. */, ptes); +} + +static const struct nvkm_vmm_desc_func +gp100_vmm_desc_spt = { + .unmap = gf100_vmm_pgt_unmap, + .sparse = gp100_vmm_pgt_sparse, + .mem = gp100_vmm_pgt_mem, + .dma = gp100_vmm_pgt_dma, + .sgl = gp100_vmm_pgt_sgl, +}; + +static void +gp100_vmm_lpt_invalid(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + /* VALID_FALSE + PRIV tells the MMU to ignore corresponding SPTEs. */ + VMM_FO064(pt, vmm, ptei * 8, BIT_ULL(5) /* PRIV. */, ptes); +} + +static const struct nvkm_vmm_desc_func +gp100_vmm_desc_lpt = { + .invalid = gp100_vmm_lpt_invalid, + .unmap = gf100_vmm_pgt_unmap, + .sparse = gp100_vmm_pgt_sparse, + .mem = gp100_vmm_pgt_mem, +}; + +static inline void +gp100_vmm_pd0_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) +{ + u64 data = (addr >> 4) | map->type; + + map->type += ptes * map->ctag; + + while (ptes--) { + VMM_WO128(pt, vmm, ptei++ * 0x10, data, 0ULL); + data += map->next; + } +} + +static void +gp100_vmm_pd0_mem(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_MEM(vmm, pt, ptei, ptes, map, gp100_vmm_pd0_pte); +} + +static inline bool +gp100_vmm_pde(struct nvkm_mmu_pt *pt, u64 *data) +{ + switch (nvkm_memory_target(pt->memory)) { + case NVKM_MEM_TARGET_VRAM: *data |= 1ULL << 1; break; + case NVKM_MEM_TARGET_HOST: *data |= 2ULL << 1; + *data |= BIT_ULL(3); /* VOL. */ + break; + case NVKM_MEM_TARGET_NCOH: *data |= 3ULL << 1; break; + default: + WARN_ON(1); + return false; + } + *data |= pt->addr >> 4; + return true; +} + +static void +gp100_vmm_pd0_pde(struct nvkm_vmm *vmm, struct nvkm_vmm_pt *pgd, u32 pdei) +{ + struct nvkm_vmm_pt *pgt = pgd->pde[pdei]; + struct nvkm_mmu_pt *pd = pgd->pt[0]; + u64 data[2] = {}; + + if (pgt->pt[0] && !gp100_vmm_pde(pgt->pt[0], &data[0])) + return; + if (pgt->pt[1] && !gp100_vmm_pde(pgt->pt[1], &data[1])) + return; + + nvkm_kmap(pd->memory); + VMM_WO128(pd, vmm, pdei * 0x10, data[0], data[1]); + nvkm_done(pd->memory); +} + +static void +gp100_vmm_pd0_sparse(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 pdei, u32 pdes) +{ + /* VALID_FALSE + VOL_BIG tells the MMU to treat the PDE as sparse. */ + VMM_FO128(pt, vmm, pdei * 0x10, BIT_ULL(3) /* VOL_BIG. */, 0ULL, pdes); +} + +static void +gp100_vmm_pd0_unmap(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 pdei, u32 pdes) +{ + VMM_FO128(pt, vmm, pdei * 0x10, 0ULL, 0ULL, pdes); +} + +static const struct nvkm_vmm_desc_func +gp100_vmm_desc_pd0 = { + .unmap = gp100_vmm_pd0_unmap, + .sparse = gp100_vmm_pd0_sparse, + .pde = gp100_vmm_pd0_pde, + .mem = gp100_vmm_pd0_mem, +}; + +static void +gp100_vmm_pd1_pde(struct nvkm_vmm *vmm, struct nvkm_vmm_pt *pgd, u32 pdei) +{ + struct nvkm_vmm_pt *pgt = pgd->pde[pdei]; + struct nvkm_mmu_pt *pd = pgd->pt[0]; + u64 data = 0; + + if (!gp100_vmm_pde(pgt->pt[0], &data)) + return; + + nvkm_kmap(pd->memory); + VMM_WO064(pd, vmm, pdei * 8, data); + nvkm_done(pd->memory); +} + +static const struct nvkm_vmm_desc_func +gp100_vmm_desc_pd1 = { + .unmap = gf100_vmm_pgt_unmap, + .sparse = gp100_vmm_pgt_sparse, + .pde = gp100_vmm_pd1_pde, +}; + +const struct nvkm_vmm_desc +gp100_vmm_desc_16[] = { + { LPT, 5, 8, 0x0100, &gp100_vmm_desc_lpt }, + { PGD, 8, 16, 0x1000, &gp100_vmm_desc_pd0 }, + { PGD, 9, 8, 0x1000, &gp100_vmm_desc_pd1 }, + { PGD, 9, 8, 0x1000, &gp100_vmm_desc_pd1 }, + { PGD, 2, 8, 0x1000, &gp100_vmm_desc_pd1 }, + {} +}; + +const struct nvkm_vmm_desc +gp100_vmm_desc_12[] = { + { SPT, 9, 8, 0x1000, &gp100_vmm_desc_spt }, + { PGD, 8, 16, 0x1000, &gp100_vmm_desc_pd0 }, + { PGD, 9, 8, 0x1000, &gp100_vmm_desc_pd1 }, + { PGD, 9, 8, 0x1000, &gp100_vmm_desc_pd1 }, + { PGD, 2, 8, 0x1000, &gp100_vmm_desc_pd1 }, + {} +}; + +int +gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, + struct nvkm_vmm_map *map) +{ + const enum nvkm_memory_target target = nvkm_memory_target(map->memory); + const struct nvkm_vmm_page *page = map->page; + union { + struct gp100_vmm_map_vn vn; + struct gp100_vmm_map_v0 v0; + } *args = argv; + struct nvkm_device *device = vmm->mmu->subdev.device; + struct nvkm_memory *memory = map->memory; + u8 kind, priv, ro, vol; + int kindn, aper, ret = -ENOSYS; + const u8 *kindm; + + map->next = (1ULL << page->shift) >> 4; + map->type = 0; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + vol = !!args->v0.vol; + ro = !!args->v0.ro; + priv = !!args->v0.priv; + kind = args->v0.kind; + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + vol = target == NVKM_MEM_TARGET_HOST; + ro = 0; + priv = 0; + kind = 0x00; + } else { + VMM_DEBUG(vmm, "args"); + return ret; + } + + aper = vmm->func->aper(target); + if (WARN_ON(aper < 0)) + return aper; + + kindm = vmm->mmu->func->kind(vmm->mmu, &kindn); + if (kind >= kindn || kindm[kind] == 0xff) { + VMM_DEBUG(vmm, "kind %02x", kind); + return -EINVAL; + } + + if (kindm[kind] != kind) { + u64 tags = nvkm_memory_size(memory) >> 16; + if (aper != 0 || !(page->type & NVKM_VMM_PAGE_COMP)) { + VMM_DEBUG(vmm, "comp %d %02x", aper, page->type); + return -EINVAL; + } + + ret = nvkm_memory_tags_get(memory, device, tags, + nvkm_ltc_tags_clear, + &map->tags); + if (ret) { + VMM_DEBUG(vmm, "comp %d", ret); + return ret; + } + + if (map->tags->mn) { + tags = map->tags->mn->offset + (map->offset >> 16); + map->ctag |= ((1ULL << page->shift) >> 16) << 36; + map->type |= tags << 36; + map->next |= map->ctag; + } else { + kind = kindm[kind]; + } + } + + map->type |= BIT(0); + map->type |= (u64)aper << 1; + map->type |= (u64) vol << 3; + map->type |= (u64)priv << 5; + map->type |= (u64) ro << 6; + map->type |= (u64)kind << 56; + return 0; +} + +void +gp100_vmm_flush(struct nvkm_vmm *vmm, int depth) +{ + gf100_vmm_flush_(vmm, 5 /* CACHE_LEVEL_UP_TO_PDE3 */ - depth); +} + +int +gp100_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + const u64 base = BIT_ULL(10) /* VER2 */ | BIT_ULL(11); /* 64KiB */ + return gf100_vmm_join_(vmm, inst, base); +} + +static const struct nvkm_vmm_func +gp100_vmm = { + .join = gp100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gp100_vmm_valid, + .flush = gp100_vmm_flush, + .page = { + { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, + { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, + { 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx }, + { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SVxC }, + { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SVxC }, + { 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SVHx }, + {} + } +}; + +int +gp100_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return nv04_vmm_new_(&gp100_vmm, mmu, 0, addr, size, + argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c new file mode 100644 index 000000000000..3dcc6bddb32f --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c @@ -0,0 +1,49 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +static const struct nvkm_vmm_func +gp10b_vmm = { + .join = gp100_vmm_join, + .part = gf100_vmm_part, + .aper = gk20a_vmm_aper, + .valid = gp100_vmm_valid, + .flush = gp100_vmm_flush, + .page = { + { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, + { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, + { 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx }, + { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SxHC }, + { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SxHC }, + { 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SxHx }, + {} + } +}; + +int +gp10b_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return nv04_vmm_new_(&gp10b_vmm, mmu, 0, addr, size, + argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv04.c new file mode 100644 index 000000000000..0cab1ffc9f64 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv04.c @@ -0,0 +1,140 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <nvif/if000d.h> +#include <nvif/unpack.h> + +static inline void +nv04_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) +{ + u32 data = addr | 0x00000003; /* PRESENT, RW. */ + while (ptes--) { + VMM_WO032(pt, vmm, 8 + ptei++ * 4, data); + data += 0x00001000; + } +} + +static void +nv04_vmm_pgt_sgl(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_SGL(vmm, pt, ptei, ptes, map, nv04_vmm_pgt_pte); +} + +static void +nv04_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ +#if PAGE_SHIFT == 12 + nvkm_kmap(pt->memory); + while (ptes--) + VMM_WO032(pt, vmm, 8 + (ptei++ * 4), *map->dma++ | 0x00000003); + nvkm_done(pt->memory); +#else + VMM_MAP_ITER_DMA(vmm, pt, ptei, ptes, map, nv04_vmm_pgt_pte); +#endif +} + +static void +nv04_vmm_pgt_unmap(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + VMM_FO032(pt, vmm, 8 + (ptei * 4), 0, ptes); +} + +static const struct nvkm_vmm_desc_func +nv04_vmm_desc_pgt = { + .unmap = nv04_vmm_pgt_unmap, + .dma = nv04_vmm_pgt_dma, + .sgl = nv04_vmm_pgt_sgl, +}; + +static const struct nvkm_vmm_desc +nv04_vmm_desc_12[] = { + { PGT, 15, 4, 0x1000, &nv04_vmm_desc_pgt }, + {} +}; + +int +nv04_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, + struct nvkm_vmm_map *map) +{ + union { + struct nv04_vmm_map_vn vn; + } *args = argv; + int ret = -ENOSYS; + if ((ret = nvif_unvers(ret, &argv, &argc, args->vn))) + VMM_DEBUG(vmm, "args"); + return ret; +} + +static const struct nvkm_vmm_func +nv04_vmm = { + .valid = nv04_vmm_valid, + .page = { + { 12, &nv04_vmm_desc_12[0], NVKM_VMM_PAGE_HOST }, + {} + } +}; + +int +nv04_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, + u32 pd_header, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + union { + struct nv04_vmm_vn vn; + } *args = argv; + int ret; + + ret = nvkm_vmm_new_(func, mmu, pd_header, addr, size, key, name, pvmm); + if (ret) + return ret; + + return nvif_unvers(-ENOSYS, &argv, &argc, args->vn); +} + +int +nv04_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + struct nvkm_memory *mem; + struct nvkm_vmm *vmm; + int ret; + + ret = nv04_vmm_new_(&nv04_vmm, mmu, 8, addr, size, + argv, argc, key, name, &vmm); + *pvmm = vmm; + if (ret) + return ret; + + mem = vmm->pd->pt[0]->memory; + nvkm_kmap(mem); + nvkm_wo32(mem, 0x00000, 0x0002103d); /* PCI, RW, PT, !LN */ + nvkm_wo32(mem, 0x00004, vmm->limit - 1); + nvkm_done(mem); + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv41.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv41.c new file mode 100644 index 000000000000..b595f130e573 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv41.c @@ -0,0 +1,113 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <subdev/timer.h> + +static void +nv41_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) +{ + u32 data = (addr >> 7) | 0x00000001; /* VALID. */ + while (ptes--) { + VMM_WO032(pt, vmm, ptei++ * 4, data); + data += 0x00000020; + } +} + +static void +nv41_vmm_pgt_sgl(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_SGL(vmm, pt, ptei, ptes, map, nv41_vmm_pgt_pte); +} + +static void +nv41_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ +#if PAGE_SHIFT == 12 + nvkm_kmap(pt->memory); + while (ptes--) { + const u32 data = (*map->dma++ >> 7) | 0x00000001; + VMM_WO032(pt, vmm, ptei++ * 4, data); + } + nvkm_done(pt->memory); +#else + VMM_MAP_ITER_DMA(vmm, pt, ptei, ptes, map, nv41_vmm_pgt_pte); +#endif +} + +static void +nv41_vmm_pgt_unmap(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + VMM_FO032(pt, vmm, ptei * 4, 0, ptes); +} + +static const struct nvkm_vmm_desc_func +nv41_vmm_desc_pgt = { + .unmap = nv41_vmm_pgt_unmap, + .dma = nv41_vmm_pgt_dma, + .sgl = nv41_vmm_pgt_sgl, +}; + +static const struct nvkm_vmm_desc +nv41_vmm_desc_12[] = { + { PGT, 17, 4, 0x1000, &nv41_vmm_desc_pgt }, + {} +}; + +static void +nv41_vmm_flush(struct nvkm_vmm *vmm, int level) +{ + struct nvkm_subdev *subdev = &vmm->mmu->subdev; + struct nvkm_device *device = subdev->device; + + mutex_lock(&subdev->mutex); + nvkm_wr32(device, 0x100810, 0x00000022); + nvkm_msec(device, 2000, + if (nvkm_rd32(device, 0x100810) & 0x00000020) + break; + ); + nvkm_wr32(device, 0x100810, 0x00000000); + mutex_unlock(&subdev->mutex); +} + +static const struct nvkm_vmm_func +nv41_vmm = { + .valid = nv04_vmm_valid, + .flush = nv41_vmm_flush, + .page = { + { 12, &nv41_vmm_desc_12[0], NVKM_VMM_PAGE_HOST }, + {} + } +}; + +int +nv41_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return nv04_vmm_new_(&nv41_vmm, mmu, 0, addr, size, + argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv44.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv44.c new file mode 100644 index 000000000000..b834e4352334 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv44.c @@ -0,0 +1,230 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <subdev/timer.h> + +static void +nv44_vmm_pgt_fill(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + dma_addr_t *list, u32 ptei, u32 ptes) +{ + u32 pteo = (ptei << 2) & ~0x0000000f; + u32 tmp[4]; + + tmp[0] = nvkm_ro32(pt->memory, pteo + 0x0); + tmp[1] = nvkm_ro32(pt->memory, pteo + 0x4); + tmp[2] = nvkm_ro32(pt->memory, pteo + 0x8); + tmp[3] = nvkm_ro32(pt->memory, pteo + 0xc); + + while (ptes--) { + u32 addr = (list ? *list++ : vmm->null) >> 12; + switch (ptei++ & 0x3) { + case 0: + tmp[0] &= ~0x07ffffff; + tmp[0] |= addr; + break; + case 1: + tmp[0] &= ~0xf8000000; + tmp[0] |= addr << 27; + tmp[1] &= ~0x003fffff; + tmp[1] |= addr >> 5; + break; + case 2: + tmp[1] &= ~0xffc00000; + tmp[1] |= addr << 22; + tmp[2] &= ~0x0001ffff; + tmp[2] |= addr >> 10; + break; + case 3: + tmp[2] &= ~0xfffe0000; + tmp[2] |= addr << 17; + tmp[3] &= ~0x00000fff; + tmp[3] |= addr >> 15; + break; + } + } + + VMM_WO032(pt, vmm, pteo + 0x0, tmp[0]); + VMM_WO032(pt, vmm, pteo + 0x4, tmp[1]); + VMM_WO032(pt, vmm, pteo + 0x8, tmp[2]); + VMM_WO032(pt, vmm, pteo + 0xc, tmp[3] | 0x40000000); +} + +static void +nv44_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) +{ + dma_addr_t tmp[4], i; + + if (ptei & 3) { + const u32 pten = min(ptes, 4 - (ptei & 3)); + for (i = 0; i < pten; i++, addr += 0x1000) + tmp[i] = addr; + nv44_vmm_pgt_fill(vmm, pt, tmp, ptei, pten); + ptei += pten; + ptes -= pten; + } + + while (ptes >= 4) { + for (i = 0; i < 4; i++, addr += 0x1000) + tmp[i] = addr >> 12; + VMM_WO032(pt, vmm, ptei++ * 4, tmp[0] >> 0 | tmp[1] << 27); + VMM_WO032(pt, vmm, ptei++ * 4, tmp[1] >> 5 | tmp[2] << 22); + VMM_WO032(pt, vmm, ptei++ * 4, tmp[2] >> 10 | tmp[3] << 17); + VMM_WO032(pt, vmm, ptei++ * 4, tmp[3] >> 15 | 0x40000000); + ptes -= 4; + } + + if (ptes) { + for (i = 0; i < ptes; i++, addr += 0x1000) + tmp[i] = addr; + nv44_vmm_pgt_fill(vmm, pt, tmp, ptei, ptes); + } +} + +static void +nv44_vmm_pgt_sgl(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_SGL(vmm, pt, ptei, ptes, map, nv44_vmm_pgt_pte); +} + +static void +nv44_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ +#if PAGE_SHIFT == 12 + nvkm_kmap(pt->memory); + if (ptei & 3) { + const u32 pten = min(ptes, 4 - (ptei & 3)); + nv44_vmm_pgt_fill(vmm, pt, map->dma, ptei, pten); + ptei += pten; + ptes -= pten; + map->dma += pten; + } + + while (ptes >= 4) { + u32 tmp[4], i; + for (i = 0; i < 4; i++) + tmp[i] = *map->dma++ >> 12; + VMM_WO032(pt, vmm, ptei++ * 4, tmp[0] >> 0 | tmp[1] << 27); + VMM_WO032(pt, vmm, ptei++ * 4, tmp[1] >> 5 | tmp[2] << 22); + VMM_WO032(pt, vmm, ptei++ * 4, tmp[2] >> 10 | tmp[3] << 17); + VMM_WO032(pt, vmm, ptei++ * 4, tmp[3] >> 15 | 0x40000000); + ptes -= 4; + } + + if (ptes) { + nv44_vmm_pgt_fill(vmm, pt, map->dma, ptei, ptes); + map->dma += ptes; + } + nvkm_done(pt->memory); +#else + VMM_MAP_ITER_DMA(vmm, pt, ptei, ptes, map, nv44_vmm_pgt_pte); +#endif +} + +static void +nv44_vmm_pgt_unmap(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + nvkm_kmap(pt->memory); + if (ptei & 3) { + const u32 pten = min(ptes, 4 - (ptei & 3)); + nv44_vmm_pgt_fill(vmm, pt, NULL, ptei, pten); + ptei += pten; + ptes -= pten; + } + + while (ptes > 4) { + VMM_WO032(pt, vmm, ptei++ * 4, 0x00000000); + VMM_WO032(pt, vmm, ptei++ * 4, 0x00000000); + VMM_WO032(pt, vmm, ptei++ * 4, 0x00000000); + VMM_WO032(pt, vmm, ptei++ * 4, 0x00000000); + ptes -= 4; + } + + if (ptes) + nv44_vmm_pgt_fill(vmm, pt, NULL, ptei, ptes); + nvkm_done(pt->memory); +} + +static const struct nvkm_vmm_desc_func +nv44_vmm_desc_pgt = { + .unmap = nv44_vmm_pgt_unmap, + .dma = nv44_vmm_pgt_dma, + .sgl = nv44_vmm_pgt_sgl, +}; + +static const struct nvkm_vmm_desc +nv44_vmm_desc_12[] = { + { PGT, 17, 4, 0x80000, &nv44_vmm_desc_pgt }, + {} +}; + +static void +nv44_vmm_flush(struct nvkm_vmm *vmm, int level) +{ + struct nvkm_device *device = vmm->mmu->subdev.device; + nvkm_wr32(device, 0x100814, vmm->limit - 4096); + nvkm_wr32(device, 0x100808, 0x000000020); + nvkm_msec(device, 2000, + if (nvkm_rd32(device, 0x100808) & 0x00000001) + break; + ); + nvkm_wr32(device, 0x100808, 0x00000000); +} + +static const struct nvkm_vmm_func +nv44_vmm = { + .valid = nv04_vmm_valid, + .flush = nv44_vmm_flush, + .page = { + { 12, &nv44_vmm_desc_12[0], NVKM_VMM_PAGE_HOST }, + {} + } +}; + +int +nv44_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + struct nvkm_subdev *subdev = &mmu->subdev; + struct nvkm_vmm *vmm; + int ret; + + ret = nv04_vmm_new_(&nv44_vmm, mmu, 0, addr, size, + argv, argc, key, name, &vmm); + *pvmm = vmm; + if (ret) + return ret; + + vmm->nullp = dma_alloc_coherent(subdev->device->dev, 16 * 1024, + &vmm->null, GFP_KERNEL); + if (!vmm->nullp) { + nvkm_warn(subdev, "unable to allocate dummy pages\n"); + vmm->null = 0; + } + + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c new file mode 100644 index 000000000000..863a2edd9861 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c @@ -0,0 +1,385 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <subdev/fb.h> +#include <subdev/timer.h> +#include <engine/gr.h> + +#include <nvif/if500d.h> +#include <nvif/unpack.h> + +static inline void +nv50_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) +{ + u64 next = addr | map->type, data; + u32 pten; + int log2blk; + + map->type += ptes * map->ctag; + + while (ptes) { + for (log2blk = 7; log2blk >= 0; log2blk--) { + pten = 1 << log2blk; + if (ptes >= pten && IS_ALIGNED(ptei, pten)) + break; + } + + data = next | (log2blk << 7); + next += pten * map->next; + ptes -= pten; + + while (pten--) + VMM_WO064(pt, vmm, ptei++ * 8, data); + } +} + +static void +nv50_vmm_pgt_sgl(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_SGL(vmm, pt, ptei, ptes, map, nv50_vmm_pgt_pte); +} + +static void +nv50_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + if (map->page->shift == PAGE_SHIFT) { + VMM_SPAM(vmm, "DMAA %08x %08x PTE(s)", ptei, ptes); + nvkm_kmap(pt->memory); + while (ptes--) { + const u64 data = *map->dma++ | map->type; + VMM_WO064(pt, vmm, ptei++ * 8, data); + map->type += map->ctag; + } + nvkm_done(pt->memory); + return; + } + + VMM_MAP_ITER_DMA(vmm, pt, ptei, ptes, map, nv50_vmm_pgt_pte); +} + +static void +nv50_vmm_pgt_mem(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_MEM(vmm, pt, ptei, ptes, map, nv50_vmm_pgt_pte); +} + +static void +nv50_vmm_pgt_unmap(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + VMM_FO064(pt, vmm, ptei * 8, 0ULL, ptes); +} + +static const struct nvkm_vmm_desc_func +nv50_vmm_pgt = { + .unmap = nv50_vmm_pgt_unmap, + .mem = nv50_vmm_pgt_mem, + .dma = nv50_vmm_pgt_dma, + .sgl = nv50_vmm_pgt_sgl, +}; + +static bool +nv50_vmm_pde(struct nvkm_vmm *vmm, struct nvkm_vmm_pt *pgt, u64 *pdata) +{ + struct nvkm_mmu_pt *pt; + u64 data = 0xdeadcafe00000000ULL; + if (pgt && (pt = pgt->pt[0])) { + switch (pgt->page) { + case 16: data = 0x00000001; break; + case 12: data = 0x00000003; + switch (nvkm_memory_size(pt->memory)) { + case 0x100000: data |= 0x00000000; break; + case 0x040000: data |= 0x00000020; break; + case 0x020000: data |= 0x00000040; break; + case 0x010000: data |= 0x00000060; break; + default: + WARN_ON(1); + return false; + } + break; + default: + WARN_ON(1); + return false; + } + + switch (nvkm_memory_target(pt->memory)) { + case NVKM_MEM_TARGET_VRAM: data |= 0x00000000; break; + case NVKM_MEM_TARGET_HOST: data |= 0x00000008; break; + case NVKM_MEM_TARGET_NCOH: data |= 0x0000000c; break; + default: + WARN_ON(1); + return false; + } + + data |= pt->addr; + } + *pdata = data; + return true; +} + +static void +nv50_vmm_pgd_pde(struct nvkm_vmm *vmm, struct nvkm_vmm_pt *pgd, u32 pdei) +{ + struct nvkm_vmm_join *join; + u32 pdeo = vmm->mmu->func->vmm.pd_offset + (pdei * 8); + u64 data; + + if (!nv50_vmm_pde(vmm, pgd->pde[pdei], &data)) + return; + + list_for_each_entry(join, &vmm->join, head) { + nvkm_kmap(join->inst); + nvkm_wo64(join->inst, pdeo, data); + nvkm_done(join->inst); + } +} + +static const struct nvkm_vmm_desc_func +nv50_vmm_pgd = { + .pde = nv50_vmm_pgd_pde, +}; + +static const struct nvkm_vmm_desc +nv50_vmm_desc_12[] = { + { PGT, 17, 8, 0x1000, &nv50_vmm_pgt }, + { PGD, 11, 0, 0x0000, &nv50_vmm_pgd }, + {} +}; + +static const struct nvkm_vmm_desc +nv50_vmm_desc_16[] = { + { PGT, 13, 8, 0x1000, &nv50_vmm_pgt }, + { PGD, 11, 0, 0x0000, &nv50_vmm_pgd }, + {} +}; + +static void +nv50_vmm_flush(struct nvkm_vmm *vmm, int level) +{ + struct nvkm_subdev *subdev = &vmm->mmu->subdev; + struct nvkm_device *device = subdev->device; + int i, id; + + mutex_lock(&subdev->mutex); + for (i = 0; i < NVKM_SUBDEV_NR; i++) { + if (!atomic_read(&vmm->engref[i])) + continue; + + /* unfortunate hw bug workaround... */ + if (i == NVKM_ENGINE_GR && device->gr) { + int ret = nvkm_gr_tlb_flush(device->gr); + if (ret != -ENODEV) + continue; + } + + switch (i) { + case NVKM_ENGINE_GR : id = 0x00; break; + case NVKM_ENGINE_VP : + case NVKM_ENGINE_MSPDEC: id = 0x01; break; + case NVKM_SUBDEV_BAR : id = 0x06; break; + case NVKM_ENGINE_MSPPP : + case NVKM_ENGINE_MPEG : id = 0x08; break; + case NVKM_ENGINE_BSP : + case NVKM_ENGINE_MSVLD : id = 0x09; break; + case NVKM_ENGINE_CIPHER: + case NVKM_ENGINE_SEC : id = 0x0a; break; + case NVKM_ENGINE_CE0 : id = 0x0d; break; + default: + continue; + } + + nvkm_wr32(device, 0x100c80, (id << 16) | 1); + if (nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x100c80) & 0x00000001)) + break; + ) < 0) + nvkm_error(subdev, "%s mmu invalidate timeout\n", + nvkm_subdev_name[i]); + } + mutex_unlock(&subdev->mutex); +} + +static int +nv50_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, + struct nvkm_vmm_map *map) +{ + const struct nvkm_vmm_page *page = map->page; + union { + struct nv50_vmm_map_vn vn; + struct nv50_vmm_map_v0 v0; + } *args = argv; + struct nvkm_device *device = vmm->mmu->subdev.device; + struct nvkm_ram *ram = device->fb->ram; + struct nvkm_memory *memory = map->memory; + u8 aper, kind, comp, priv, ro; + int kindn, ret = -ENOSYS; + const u8 *kindm; + + map->type = map->ctag = 0; + map->next = 1 << page->shift; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + ro = !!args->v0.ro; + priv = !!args->v0.priv; + kind = args->v0.kind & 0x7f; + comp = args->v0.comp & 0x03; + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + ro = 0; + priv = 0; + kind = 0x00; + comp = 0; + } else { + VMM_DEBUG(vmm, "args"); + return ret; + } + + switch (nvkm_memory_target(memory)) { + case NVKM_MEM_TARGET_VRAM: + if (ram->stolen) { + map->type |= ram->stolen; + aper = 3; + } else { + aper = 0; + } + break; + case NVKM_MEM_TARGET_HOST: + aper = 2; + break; + case NVKM_MEM_TARGET_NCOH: + aper = 3; + break; + default: + WARN_ON(1); + return -EINVAL; + } + + kindm = vmm->mmu->func->kind(vmm->mmu, &kindn); + if (kind >= kindn || kindm[kind] == 0x7f) { + VMM_DEBUG(vmm, "kind %02x", kind); + return -EINVAL; + } + + if (map->mem && map->mem->type != kindm[kind]) { + VMM_DEBUG(vmm, "kind %02x bankswz: %d %d", kind, + kindm[kind], map->mem->type); + return -EINVAL; + } + + if (comp) { + u32 tags = (nvkm_memory_size(memory) >> 16) * comp; + if (aper != 0 || !(page->type & NVKM_VMM_PAGE_COMP)) { + VMM_DEBUG(vmm, "comp %d %02x", aper, page->type); + return -EINVAL; + } + + ret = nvkm_memory_tags_get(memory, device, tags, NULL, + &map->tags); + if (ret) { + VMM_DEBUG(vmm, "comp %d", ret); + return ret; + } + + if (map->tags->mn) { + u32 tags = map->tags->mn->offset + (map->offset >> 16); + map->ctag |= (u64)comp << 49; + map->type |= (u64)comp << 47; + map->type |= (u64)tags << 49; + map->next |= map->ctag; + } + } + + map->type |= BIT(0); /* Valid. */ + map->type |= (u64)ro << 3; + map->type |= (u64)aper << 4; + map->type |= (u64)priv << 6; + map->type |= (u64)kind << 40; + return 0; +} + +static void +nv50_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + struct nvkm_vmm_join *join; + + list_for_each_entry(join, &vmm->join, head) { + if (join->inst == inst) { + list_del(&join->head); + kfree(join); + break; + } + } +} + +static int +nv50_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + const u32 pd_offset = vmm->mmu->func->vmm.pd_offset; + struct nvkm_vmm_join *join; + int ret = 0; + u64 data; + u32 pdei; + + if (!(join = kmalloc(sizeof(*join), GFP_KERNEL))) + return -ENOMEM; + join->inst = inst; + list_add_tail(&join->head, &vmm->join); + + nvkm_kmap(join->inst); + for (pdei = vmm->start >> 29; pdei <= (vmm->limit - 1) >> 29; pdei++) { + if (!nv50_vmm_pde(vmm, vmm->pd->pde[pdei], &data)) { + ret = -EINVAL; + break; + } + nvkm_wo64(join->inst, pd_offset + (pdei * 8), data); + } + nvkm_done(join->inst); + return ret; +} + +static const struct nvkm_vmm_func +nv50_vmm = { + .join = nv50_vmm_join, + .part = nv50_vmm_part, + .valid = nv50_vmm_valid, + .flush = nv50_vmm_flush, + .page_block = 1 << 29, + .page = { + { 16, &nv50_vmm_desc_16[0], NVKM_VMM_PAGE_xVxC }, + { 12, &nv50_vmm_desc_12[0], NVKM_VMM_PAGE_xVHx }, + {} + } +}; + +int +nv50_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return nv04_vmm_new_(&nv50_vmm, mmu, 0, addr, size, + argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c index a4cb82495cee..b1b1f3626b96 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c @@ -87,7 +87,7 @@ nvkm_pci_fini(struct nvkm_subdev *subdev, bool suspend) if (pci->irq >= 0) { free_irq(pci->irq, pci); pci->irq = -1; - }; + } if (pci->agp.bridge) nvkm_agp_fini(pci); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c index 73ca1203281d..5e91b3f90065 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c @@ -39,7 +39,7 @@ gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob, { struct gm200_secboot *gsb = gm200_secboot(sb); struct nvkm_subdev *subdev = &gsb->base.subdev; - struct nvkm_vma vma; + struct nvkm_vma *vma = NULL; u32 start_address; int ret; @@ -48,12 +48,16 @@ gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob, return ret; /* Map the HS firmware so the HS bootloader can see it */ - ret = nvkm_gpuobj_map(blob, gsb->vm, NV_MEM_ACCESS_RW, &vma); + ret = nvkm_vmm_get(gsb->vmm, 12, blob->size, &vma); if (ret) { nvkm_falcon_put(falcon, subdev); return ret; } + ret = nvkm_memory_map(blob, 0, gsb->vmm, vma, NULL, 0); + if (ret) + goto end; + /* Reset and set the falcon up */ ret = nvkm_falcon_reset(falcon); if (ret) @@ -61,7 +65,7 @@ gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob, nvkm_falcon_bind_context(falcon, gsb->inst); /* Load the HS bootloader into the falcon's IMEM/DMEM */ - ret = sb->acr->func->load(sb->acr, falcon, blob, vma.offset); + ret = sb->acr->func->load(sb->acr, falcon, blob, vma->addr); if (ret < 0) goto end; @@ -91,7 +95,7 @@ end: nvkm_mc_intr_mask(sb->subdev.device, falcon->owner->index, true); /* We don't need the ACR firmware anymore */ - nvkm_gpuobj_unmap(&vma); + nvkm_vmm_put(gsb->vmm, &vma); nvkm_falcon_put(falcon, subdev); return ret; @@ -102,37 +106,26 @@ gm200_secboot_oneinit(struct nvkm_secboot *sb) { struct gm200_secboot *gsb = gm200_secboot(sb); struct nvkm_device *device = sb->subdev.device; - struct nvkm_vm *vm; - const u64 vm_area_len = 600 * 1024; int ret; /* Allocate instance block and VM */ - ret = nvkm_gpuobj_new(device, 0x1000, 0, true, NULL, &gsb->inst); + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0, true, + &gsb->inst); if (ret) return ret; - ret = nvkm_gpuobj_new(device, 0x8000, 0, true, NULL, &gsb->pgd); + ret = nvkm_vmm_new(device, 0, 600 * 1024, NULL, 0, NULL, "acr", + &gsb->vmm); if (ret) return ret; - ret = nvkm_vm_new(device, 0, vm_area_len, 0, NULL, &vm); - if (ret) - return ret; - - atomic_inc(&vm->engref[NVKM_SUBDEV_PMU]); + atomic_inc(&gsb->vmm->engref[NVKM_SUBDEV_PMU]); + gsb->vmm->debug = gsb->base.subdev.debug; - ret = nvkm_vm_ref(vm, &gsb->vm, gsb->pgd); - nvkm_vm_ref(NULL, &vm, NULL); + ret = nvkm_vmm_join(gsb->vmm, gsb->inst); if (ret) return ret; - nvkm_kmap(gsb->inst); - nvkm_wo32(gsb->inst, 0x200, lower_32_bits(gsb->pgd->addr)); - nvkm_wo32(gsb->inst, 0x204, upper_32_bits(gsb->pgd->addr)); - nvkm_wo32(gsb->inst, 0x208, lower_32_bits(vm_area_len - 1)); - nvkm_wo32(gsb->inst, 0x20c, upper_32_bits(vm_area_len - 1)); - nvkm_done(gsb->inst); - if (sb->acr->func->oneinit) { ret = sb->acr->func->oneinit(sb->acr, sb); if (ret) @@ -160,9 +153,9 @@ gm200_secboot_dtor(struct nvkm_secboot *sb) sb->acr->func->dtor(sb->acr); - nvkm_vm_ref(NULL, &gsb->vm, gsb->pgd); - nvkm_gpuobj_del(&gsb->pgd); - nvkm_gpuobj_del(&gsb->inst); + nvkm_vmm_part(gsb->vmm, gsb->inst); + nvkm_vmm_unref(&gsb->vmm); + nvkm_memory_unref(&gsb->inst); return gsb; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h index c8ab3d76bdef..62c5e162099a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h @@ -29,9 +29,8 @@ struct gm200_secboot { struct nvkm_secboot base; /* Instance block & address space used for HS FW execution */ - struct nvkm_gpuobj *inst; - struct nvkm_gpuobj *pgd; - struct nvkm_vm *vm; + struct nvkm_memory *inst; + struct nvkm_vmm *vmm; }; #define gm200_secboot(sb) container_of(sb, struct gm200_secboot, base) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c index ee989210725e..6f10b098676c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c @@ -183,7 +183,7 @@ acr_ls_sec2_post_run(const struct nvkm_acr *acr, const struct nvkm_secboot *sb) break; ); if (reg & BIT(4)) { - nvkm_debug(subdev, "applying workaround for start bug..."); + nvkm_debug(subdev, "applying workaround for start bug...\n"); nvkm_falcon_start(sb->boot_falcon); nvkm_msec(subdev->device, 1, if ((reg = nvkm_rd32(subdev->device, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h index 885e919a8720..d9091f029506 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h @@ -25,6 +25,7 @@ #include <subdev/secboot.h> #include <subdev/mmu.h> +struct nvkm_gpuobj; struct nvkm_secboot_func { int (*oneinit)(struct nvkm_secboot *); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild index 2bafcc1d1818..7ba56b12badd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild @@ -12,3 +12,4 @@ nvkm-y += nvkm/subdev/therm/gt215.o nvkm-y += nvkm/subdev/therm/gf119.o nvkm-y += nvkm/subdev/therm/gm107.o nvkm-y += nvkm/subdev/therm/gm200.o +nvkm-y += nvkm/subdev/therm/gp100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c index 952a7cb0a59a..f27fc6d0d4c6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c @@ -341,7 +341,8 @@ nvkm_therm_init(struct nvkm_subdev *subdev) { struct nvkm_therm *therm = nvkm_therm(subdev); - therm->func->init(therm); + if (therm->func->init) + therm->func->init(therm); if (therm->suspend >= 0) { /* restore the pwm value only when on manual or auto mode */ diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gp100.c new file mode 100644 index 000000000000..9f0dea3f61dc --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gp100.c @@ -0,0 +1,56 @@ +/* + * Copyright 2017 Rhys Kidd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Rhys Kidd + */ +#include "priv.h" + +static int +gp100_temp_get(struct nvkm_therm *therm) +{ + struct nvkm_device *device = therm->subdev.device; + struct nvkm_subdev *subdev = &therm->subdev; + u32 tsensor = nvkm_rd32(device, 0x020460); + u32 inttemp = (tsensor & 0x0001fff8); + + /* device SHADOWed */ + if (tsensor & 0x40000000) + nvkm_trace(subdev, "reading temperature from SHADOWed sensor\n"); + + /* device valid */ + if (tsensor & 0x20000000) + return (inttemp >> 8); + else + return -ENODEV; +} + +static const struct nvkm_therm_func +gp100_therm = { + .temp_get = gp100_temp_get, + .program_alarms = nvkm_therm_program_alarms_polling, +}; + +int +gp100_therm_new(struct nvkm_device *device, int index, + struct nvkm_therm **ptherm) +{ + return nvkm_therm_new_(&gp100_therm, device, index, ptherm); +} diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index 74fc9362ecf9..c0fb52c6d4ca 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -219,7 +219,7 @@ int qxl_garbage_collect(struct qxl_device *qdev) union qxl_release_info *info; while (qxl_ring_pop(qdev->release_ring, &id)) { - QXL_INFO(qdev, "popped %lld\n", id); + DRM_DEBUG_DRIVER("popped %lld\n", id); while (id) { release = qxl_release_from_id_locked(qdev, id); if (release == NULL) @@ -229,8 +229,8 @@ int qxl_garbage_collect(struct qxl_device *qdev) next_id = info->next; qxl_release_unmap(qdev, release, info); - QXL_INFO(qdev, "popped %lld, next %lld\n", id, - next_id); + DRM_DEBUG_DRIVER("popped %lld, next %lld\n", id, + next_id); switch (release->type) { case QXL_RELEASE_DRAWABLE: @@ -248,7 +248,7 @@ int qxl_garbage_collect(struct qxl_device *qdev) } } - QXL_INFO(qdev, "%s: %d\n", __func__, i); + DRM_DEBUG_DRIVER("%d\n", i); return i; } @@ -381,17 +381,19 @@ void qxl_io_create_primary(struct qxl_device *qdev, { struct qxl_surface_create *create; - QXL_INFO(qdev, "%s: qdev %p, ram_header %p\n", __func__, qdev, - qdev->ram_header); + DRM_DEBUG_DRIVER("qdev %p, ram_header %p\n", qdev, qdev->ram_header); create = &qdev->ram_header->create_surface; create->format = bo->surf.format; create->width = bo->surf.width; create->height = bo->surf.height; create->stride = bo->surf.stride; - create->mem = qxl_bo_physical_address(qdev, bo, offset); + if (bo->shadow) { + create->mem = qxl_bo_physical_address(qdev, bo->shadow, offset); + } else { + create->mem = qxl_bo_physical_address(qdev, bo, offset); + } - QXL_INFO(qdev, "%s: mem = %llx, from %p\n", __func__, create->mem, - bo->kptr); + DRM_DEBUG_DRIVER("mem = %llx, from %p\n", create->mem, bo->kptr); create->flags = QXL_SURF_FLAG_KEEP_DATA; create->type = QXL_SURF_TYPE_PRIMARY; @@ -401,7 +403,7 @@ void qxl_io_create_primary(struct qxl_device *qdev, void qxl_io_memslot_add(struct qxl_device *qdev, uint8_t id) { - QXL_INFO(qdev, "qxl_memslot_add %d\n", id); + DRM_DEBUG_DRIVER("qxl_memslot_add %d\n", id); wait_for_io_cmd(qdev, id, QXL_IO_MEMSLOT_ADD_ASYNC); } diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index afbf50d0c08f..4756b3c9bf2c 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -305,7 +305,9 @@ static const struct drm_crtc_funcs qxl_crtc_funcs = { void qxl_user_framebuffer_destroy(struct drm_framebuffer *fb) { struct qxl_framebuffer *qxl_fb = to_qxl_framebuffer(fb); + struct qxl_bo *bo = gem_to_qxl_bo(qxl_fb->obj); + WARN_ON(bo->shadow); drm_gem_object_unreference_unlocked(qxl_fb->obj); drm_framebuffer_cleanup(fb); kfree(qxl_fb); @@ -508,6 +510,7 @@ static void qxl_primary_atomic_update(struct drm_plane *plane, .x2 = qfb->base.width, .y2 = qfb->base.height }; + bool same_shadow = false; if (old_state->fb) { qfb_old = to_qxl_framebuffer(old_state->fb); @@ -519,15 +522,23 @@ static void qxl_primary_atomic_update(struct drm_plane *plane, if (bo == bo_old) return; + if (bo_old && bo_old->shadow && bo->shadow && + bo_old->shadow == bo->shadow) { + same_shadow = true; + } + if (bo_old && bo_old->is_primary) { - qxl_io_destroy_primary(qdev); + if (!same_shadow) + qxl_io_destroy_primary(qdev); bo_old->is_primary = false; } if (!bo->is_primary) { - qxl_io_create_primary(qdev, 0, bo); + if (!same_shadow) + qxl_io_create_primary(qdev, 0, bo); bo->is_primary = true; } + qxl_draw_dirty_fb(qdev, qfb, bo, 0, 0, &norect, 1, 1); } @@ -679,8 +690,9 @@ static void qxl_cursor_atomic_disable(struct drm_plane *plane, static int qxl_plane_prepare_fb(struct drm_plane *plane, struct drm_plane_state *new_state) { + struct qxl_device *qdev = plane->dev->dev_private; struct drm_gem_object *obj; - struct qxl_bo *user_bo; + struct qxl_bo *user_bo, *old_bo = NULL; int ret; if (!new_state->fb) @@ -689,6 +701,32 @@ static int qxl_plane_prepare_fb(struct drm_plane *plane, obj = to_qxl_framebuffer(new_state->fb)->obj; user_bo = gem_to_qxl_bo(obj); + if (plane->type == DRM_PLANE_TYPE_PRIMARY && + user_bo->is_dumb && !user_bo->shadow) { + if (plane->state->fb) { + obj = to_qxl_framebuffer(plane->state->fb)->obj; + old_bo = gem_to_qxl_bo(obj); + } + if (old_bo && old_bo->shadow && + user_bo->gem_base.size == old_bo->gem_base.size && + plane->state->crtc == new_state->crtc && + plane->state->crtc_w == new_state->crtc_w && + plane->state->crtc_h == new_state->crtc_h && + plane->state->src_x == new_state->src_x && + plane->state->src_y == new_state->src_y && + plane->state->src_w == new_state->src_w && + plane->state->src_h == new_state->src_h && + plane->state->rotation == new_state->rotation && + plane->state->zpos == new_state->zpos) { + drm_gem_object_get(&old_bo->shadow->gem_base); + user_bo->shadow = old_bo->shadow; + } else { + qxl_bo_create(qdev, user_bo->gem_base.size, + true, true, QXL_GEM_DOMAIN_VRAM, NULL, + &user_bo->shadow); + } + } + ret = qxl_bo_pin(user_bo, QXL_GEM_DOMAIN_CPU, NULL); if (ret) return ret; @@ -713,6 +751,11 @@ static void qxl_plane_cleanup_fb(struct drm_plane *plane, obj = to_qxl_framebuffer(old_state->fb)->obj; user_bo = gem_to_qxl_bo(obj); qxl_bo_unpin(user_bo); + + if (user_bo->shadow && !user_bo->is_primary) { + drm_gem_object_put_unlocked(&user_bo->shadow->gem_base); + user_bo->shadow = NULL; + } } static const uint32_t qxl_cursor_plane_formats[] = { diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 3397a1907336..08752c0ffb35 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -62,33 +62,9 @@ #define QXL_DEBUGFS_MAX_COMPONENTS 32 -extern int qxl_log_level; extern int qxl_num_crtc; extern int qxl_max_ioctls; -enum { - QXL_INFO_LEVEL = 1, - QXL_DEBUG_LEVEL = 2, -}; - -#define QXL_INFO(qdev, fmt, ...) do { \ - if (qxl_log_level >= QXL_INFO_LEVEL) { \ - qxl_io_log(qdev, fmt, __VA_ARGS__); \ - } \ - } while (0) -#define QXL_DEBUG(qdev, fmt, ...) do { \ - if (qxl_log_level >= QXL_DEBUG_LEVEL) { \ - qxl_io_log(qdev, fmt, __VA_ARGS__); \ - } \ - } while (0) -#define QXL_INFO_ONCE(qdev, fmt, ...) do { \ - static int done; \ - if (!done) { \ - done = 1; \ - QXL_INFO(qdev, fmt, __VA_ARGS__); \ - } \ - } while (0) - #define DRM_FILE_OFFSET 0x100000000ULL #define DRM_FILE_PAGE_OFFSET (DRM_FILE_OFFSET >> PAGE_SHIFT) @@ -113,6 +89,8 @@ struct qxl_bo { /* Constant after initialization */ struct drm_gem_object gem_base; bool is_primary; /* is this now a primary surface */ + bool is_dumb; + struct qxl_bo *shadow; bool hw_surf_alloc; struct qxl_surface surf; uint32_t surface_id; @@ -351,7 +329,7 @@ int qxl_check_idle(struct qxl_ring *ring); static inline void * qxl_fb_virtual_address(struct qxl_device *qdev, unsigned long physical) { - QXL_INFO(qdev, "not implemented (%lu)\n", physical); + DRM_DEBUG_DRIVER("not implemented (%lu)\n", physical); return 0; } diff --git a/drivers/gpu/drm/qxl/qxl_dumb.c b/drivers/gpu/drm/qxl/qxl_dumb.c index 5e65d5d2d937..11085ab01374 100644 --- a/drivers/gpu/drm/qxl/qxl_dumb.c +++ b/drivers/gpu/drm/qxl/qxl_dumb.c @@ -63,6 +63,7 @@ int qxl_mode_dumb_create(struct drm_file *file_priv, &handle); if (r) return r; + qobj->is_dumb = true; args->pitch = pitch; args->handle = handle; return 0; diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c index 844c4a31ca13..23af3e352673 100644 --- a/drivers/gpu/drm/qxl/qxl_fb.c +++ b/drivers/gpu/drm/qxl/qxl_fb.c @@ -240,18 +240,15 @@ static int qxlfb_create(struct qxl_fbdev *qfbdev, return ret; qbo = gem_to_qxl_bo(gobj); - QXL_INFO(qdev, "%s: %dx%d %d\n", __func__, mode_cmd.width, - mode_cmd.height, mode_cmd.pitches[0]); + DRM_DEBUG_DRIVER("%dx%d %d\n", mode_cmd.width, + mode_cmd.height, mode_cmd.pitches[0]); shadow = vmalloc(mode_cmd.pitches[0] * mode_cmd.height); /* TODO: what's the usual response to memory allocation errors? */ BUG_ON(!shadow); - QXL_INFO(qdev, - "surface0 at gpu offset %lld, mmap_offset %lld (virt %p, shadow %p)\n", - qxl_bo_gpu_offset(qbo), - qxl_bo_mmap_offset(qbo), - qbo->kptr, - shadow); + DRM_DEBUG_DRIVER("surface0 at gpu offset %lld, mmap_offset %lld (virt %p, shadow %p)\n", + qxl_bo_gpu_offset(qbo), qxl_bo_mmap_offset(qbo), + qbo->kptr, shadow); size = mode_cmd.pitches[0] * mode_cmd.height; info = drm_fb_helper_alloc_fbi(&qfbdev->helper); diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index e6ec845b5be0..a6da6fa6ad58 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -154,7 +154,7 @@ qxl_release_alloc(struct qxl_device *qdev, int type, return handle; } *ret = release; - QXL_INFO(qdev, "allocated release %d\n", handle); + DRM_DEBUG_DRIVER("allocated release %d\n", handle); release->id = handle; return handle; } @@ -179,8 +179,7 @@ void qxl_release_free(struct qxl_device *qdev, struct qxl_release *release) { - QXL_INFO(qdev, "release %d, type %d\n", release->id, - release->type); + DRM_DEBUG_DRIVER("release %d, type %d\n", release->id, release->type); if (release->surface_release_id) qxl_surface_id_dealloc(qdev, release->surface_release_id); diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index 7ecf8a4b9fe6..ab4823875311 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -136,8 +136,8 @@ int qxl_mmap(struct file *filp, struct vm_area_struct *vma) "filp->private_data->minor->dev->dev_private == NULL\n"); return -EINVAL; } - QXL_INFO(qdev, "%s: filp->private_data = 0x%p, vma->vm_pgoff = %lx\n", - __func__, filp->private_data, vma->vm_pgoff); + DRM_DEBUG_DRIVER("filp->private_data = 0x%p, vma->vm_pgoff = %lx\n", + filp->private_data, vma->vm_pgoff); r = ttm_bo_mmap(filp, vma, &qdev->mman.bdev); if (unlikely(r != 0)) diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index be16c6390216..cf3e5985e3e7 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -102,8 +102,7 @@ radeon-y += \ radeon-y += \ radeon_vce.o \ vce_v1_0.o \ - vce_v2_0.o \ - radeon_kfd.o + vce_v2_0.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o radeon-$(CONFIG_ACPI) += radeon_acpi.o diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index a904c80c30e6..3e798593e042 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -45,34 +45,32 @@ static char *pre_emph_names[] = { /***** radeon AUX functions *****/ -/* Atom needs data in little endian format - * so swap as appropriate when copying data to - * or from atom. Note that atom operates on - * dw units. +/* Atom needs data in little endian format so swap as appropriate when copying + * data to or from atom. Note that atom operates on dw units. + * + * Use to_le=true when sending data to atom and provide at least + * ALIGN(num_bytes,4) bytes in the dst buffer. + * + * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4) + * byes in the src buffer. */ void radeon_atom_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) { #ifdef __BIG_ENDIAN - u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ - u32 *dst32, *src32; + u32 src_tmp[5], dst_tmp[5]; int i; + u8 align_num_bytes = ALIGN(num_bytes, 4); - memcpy(src_tmp, src, num_bytes); - src32 = (u32 *)src_tmp; - dst32 = (u32 *)dst_tmp; if (to_le) { - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = cpu_to_le32(src32[i]); - memcpy(dst, dst_tmp, num_bytes); + memcpy(src_tmp, src, num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = cpu_to_le32(src_tmp[i]); + memcpy(dst, dst_tmp, align_num_bytes); } else { - u8 dws = num_bytes & ~3; - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = le32_to_cpu(src32[i]); - memcpy(dst, dst_tmp, dws); - if (num_bytes % 4) { - for (i = 0; i < (num_bytes % 4); i++) - dst[dws+i] = dst_tmp[dws+i]; - } + memcpy(src_tmp, src, align_num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = le32_to_cpu(src_tmp[i]); + memcpy(dst, dst_tmp, num_bytes); } #else memcpy(dst, src, num_bytes); diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 3cb6c55b268d..898f9a078830 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -33,7 +33,6 @@ #include "cik_blit_shaders.h" #include "radeon_ucode.h" #include "clearstate_ci.h" -#include "radeon_kfd.h" #define SH_MEM_CONFIG_GFX_DEFAULT \ ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) @@ -5684,10 +5683,9 @@ int cik_vm_init(struct radeon_device *rdev) /* * number of VMs * VMID 0 is reserved for System - * radeon graphics/compute will use VMIDs 1-7 - * amdkfd will use VMIDs 8-15 + * radeon graphics/compute will use VMIDs 1-15 */ - rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS; + rdev->vm_manager.nvm = 16; /* base offset of vram pages */ if (rdev->flags & RADEON_IS_IGP) { u64 tmp = RREG32(MC_VM_FB_OFFSET); @@ -7589,9 +7587,6 @@ restart_ih: /* wptr/rptr are in bytes! */ ring_index = rptr / 4; - radeon_kfd_interrupt(rdev, - (const void *) &rdev->ih.ring[ring_index]); - src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; @@ -8486,10 +8481,6 @@ static int cik_startup(struct radeon_device *rdev) if (r) return r; - r = radeon_kfd_resume(rdev); - if (r) - return r; - return 0; } @@ -8538,7 +8529,6 @@ int cik_resume(struct radeon_device *rdev) */ int cik_suspend(struct radeon_device *rdev) { - radeon_kfd_suspend(rdev); radeon_pm_suspend(rdev); radeon_audio_fini(rdev); radeon_vm_manager_fini(rdev); diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index e21015475ed5..cda16fcd43bb 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -30,8 +30,6 @@ #define CIK_RB_BITMAP_WIDTH_PER_SH 2 #define HAWAII_RB_BITMAP_WIDTH_PER_SH 4 -#define RADEON_NUM_OF_VMIDS 8 - /* DIDT IND registers */ #define DIDT_SQ_CTRL0 0x0 # define DIDT_CTRL_EN (1 << 0) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8cbaeec090c9..a8e546569858 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2456,9 +2456,6 @@ struct radeon_device { u64 vram_pin_size; u64 gart_pin_size; - /* amdkfd interface */ - struct kfd_dev *kfd; - struct mutex mn_lock; DECLARE_HASHTABLE(mn_hash, 7); }; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index f4becad0a78c..31dd04f6baa1 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -43,7 +43,6 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_crtc_helper.h> -#include "radeon_kfd.h" /* * KMS wrapper. @@ -338,14 +337,6 @@ static int radeon_pci_probe(struct pci_dev *pdev, { int ret; - /* - * Initialize amdkfd before starting radeon. If it was not loaded yet, - * defer radeon probing - */ - ret = radeon_kfd_init(); - if (ret == -EPROBE_DEFER) - return ret; - if (vga_switcheroo_client_probe_defer(pdev)) return -EPROBE_DEFER; @@ -645,7 +636,6 @@ static int __init radeon_init(void) static void __exit radeon_exit(void) { - radeon_kfd_fini(); pci_unregister_driver(pdriver); radeon_unregister_atpx_handler(); } diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c deleted file mode 100644 index 385b4d76956d..000000000000 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ /dev/null @@ -1,901 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <linux/module.h> -#include <linux/fdtable.h> -#include <linux/uaccess.h> -#include <drm/drmP.h> -#include "radeon.h" -#include "cikd.h" -#include "cik_reg.h" -#include "radeon_kfd.h" -#include "radeon_ucode.h" -#include <linux/firmware.h> -#include "cik_structs.h" - -#define CIK_PIPE_PER_MEC (4) - -static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { - TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL, - TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL, - TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL, - TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL -}; - -struct kgd_mem { - struct radeon_bo *bo; - uint64_t gpu_addr; - void *cpu_ptr; -}; - - -static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, - void **mem_obj, uint64_t *gpu_addr, - void **cpu_ptr); - -static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); - -static uint64_t get_vmem_size(struct kgd_dev *kgd); -static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); - -static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); - -static int alloc_pasid(unsigned int bits); -static void free_pasid(unsigned int pasid); - -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); - -/* - * Register access functions - */ - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, - uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, - uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); - -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, - unsigned int vmid); - -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); - -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, - unsigned int timeout, uint32_t pipe_id, - uint32_t queue_id); -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, - unsigned int timeout); -static int kgd_address_watch_disable(struct kgd_dev *kgd); -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); -static int kgd_wave_control_execute(struct kgd_dev *kgd, - uint32_t gfx_index_val, - uint32_t sq_cmd); -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset); - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); - -static const struct kfd2kgd_calls kfd2kgd = { - .init_gtt_mem_allocation = alloc_gtt_mem, - .free_gtt_mem = free_gtt_mem, - .get_vmem_size = get_vmem_size, - .get_gpu_clock_counter = get_gpu_clock_counter, - .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, - .alloc_pasid = alloc_pasid, - .free_pasid = free_pasid, - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_pipeline = kgd_init_pipeline, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, - .write_vmid_invalidate_request = write_vmid_invalidate_request, - .get_fw_version = get_fw_version -}; - -static const struct kgd2kfd_calls *kgd2kfd; - -int radeon_kfd_init(void) -{ - int ret; - -#if defined(CONFIG_HSA_AMD_MODULE) - int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); - - kgd2kfd_init_p = symbol_request(kgd2kfd_init); - - if (kgd2kfd_init_p == NULL) - return -ENOENT; - - ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd); - if (ret) { - symbol_put(kgd2kfd_init); - kgd2kfd = NULL; - } - -#elif defined(CONFIG_HSA_AMD) - ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); - if (ret) - kgd2kfd = NULL; - -#else - ret = -ENOENT; -#endif - - return ret; -} - -void radeon_kfd_fini(void) -{ - if (kgd2kfd) { - kgd2kfd->exit(); - symbol_put(kgd2kfd_init); - } -} - -void radeon_kfd_device_probe(struct radeon_device *rdev) -{ - if (kgd2kfd) - rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, - rdev->pdev, &kfd2kgd); -} - -void radeon_kfd_device_init(struct radeon_device *rdev) -{ - int i, queue, pipe, mec; - - if (rdev->kfd) { - struct kgd2kfd_shared_resources gpu_resources = { - .compute_vmid_bitmap = 0xFF00, - .num_pipe_per_mec = 4, - .num_queue_per_pipe = 8 - }; - - bitmap_zero(gpu_resources.queue_bitmap, KGD_MAX_QUEUES); - - for (i = 0; i < KGD_MAX_QUEUES; ++i) { - queue = i % gpu_resources.num_queue_per_pipe; - pipe = (i / gpu_resources.num_queue_per_pipe) - % gpu_resources.num_pipe_per_mec; - mec = (i / gpu_resources.num_queue_per_pipe) - / gpu_resources.num_pipe_per_mec; - - if (mec == 0 && pipe > 0) - set_bit(i, gpu_resources.queue_bitmap); - } - - radeon_doorbell_get_kfd_info(rdev, - &gpu_resources.doorbell_physical_address, - &gpu_resources.doorbell_aperture_size, - &gpu_resources.doorbell_start_offset); - - kgd2kfd->device_init(rdev->kfd, &gpu_resources); - } -} - -void radeon_kfd_device_fini(struct radeon_device *rdev) -{ - if (rdev->kfd) { - kgd2kfd->device_exit(rdev->kfd); - rdev->kfd = NULL; - } -} - -void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry) -{ - if (rdev->kfd) - kgd2kfd->interrupt(rdev->kfd, ih_ring_entry); -} - -void radeon_kfd_suspend(struct radeon_device *rdev) -{ - if (rdev->kfd) - kgd2kfd->suspend(rdev->kfd); -} - -int radeon_kfd_resume(struct radeon_device *rdev) -{ - int r = 0; - - if (rdev->kfd) - r = kgd2kfd->resume(rdev->kfd); - - return r; -} - -static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, - void **mem_obj, uint64_t *gpu_addr, - void **cpu_ptr) -{ - struct radeon_device *rdev = (struct radeon_device *)kgd; - struct kgd_mem **mem = (struct kgd_mem **) mem_obj; - int r; - - BUG_ON(kgd == NULL); - BUG_ON(gpu_addr == NULL); - BUG_ON(cpu_ptr == NULL); - - *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if ((*mem) == NULL) - return -ENOMEM; - - r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT, - RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo); - if (r) { - dev_err(rdev->dev, - "failed to allocate BO for amdkfd (%d)\n", r); - return r; - } - - /* map the buffer */ - r = radeon_bo_reserve((*mem)->bo, true); - if (r) { - dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r); - goto allocate_mem_reserve_bo_failed; - } - - r = radeon_bo_pin((*mem)->bo, RADEON_GEM_DOMAIN_GTT, - &(*mem)->gpu_addr); - if (r) { - dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r); - goto allocate_mem_pin_bo_failed; - } - *gpu_addr = (*mem)->gpu_addr; - - r = radeon_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); - if (r) { - dev_err(rdev->dev, - "(%d) failed to map bo to kernel for amdkfd\n", r); - goto allocate_mem_kmap_bo_failed; - } - *cpu_ptr = (*mem)->cpu_ptr; - - radeon_bo_unreserve((*mem)->bo); - - return 0; - -allocate_mem_kmap_bo_failed: - radeon_bo_unpin((*mem)->bo); -allocate_mem_pin_bo_failed: - radeon_bo_unreserve((*mem)->bo); -allocate_mem_reserve_bo_failed: - radeon_bo_unref(&(*mem)->bo); - - return r; -} - -static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) -{ - struct kgd_mem *mem = (struct kgd_mem *) mem_obj; - - BUG_ON(mem == NULL); - - radeon_bo_reserve(mem->bo, true); - radeon_bo_kunmap(mem->bo); - radeon_bo_unpin(mem->bo); - radeon_bo_unreserve(mem->bo); - radeon_bo_unref(&(mem->bo)); - kfree(mem); -} - -static uint64_t get_vmem_size(struct kgd_dev *kgd) -{ - struct radeon_device *rdev = (struct radeon_device *)kgd; - - BUG_ON(kgd == NULL); - - return rdev->mc.real_vram_size; -} - -static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) -{ - struct radeon_device *rdev = (struct radeon_device *)kgd; - - return rdev->asic->get_gpu_clock_counter(rdev); -} - -static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) -{ - struct radeon_device *rdev = (struct radeon_device *)kgd; - - /* The sclk is in quantas of 10kHz */ - return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; -} - -/* - * PASID manager - */ -static DEFINE_IDA(pasid_ida); - -static int alloc_pasid(unsigned int bits) -{ - int pasid = -EINVAL; - - for (bits = min(bits, 31U); bits > 0; bits--) { - pasid = ida_simple_get(&pasid_ida, - 1U << (bits - 1), 1U << bits, - GFP_KERNEL); - if (pasid != -ENOSPC) - break; - } - - return pasid; -} - -static void free_pasid(unsigned int pasid) -{ - ida_simple_remove(&pasid_ida, pasid); -} - -static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd) -{ - return (struct radeon_device *)kgd; -} - -static void write_register(struct kgd_dev *kgd, uint32_t offset, uint32_t value) -{ - struct radeon_device *rdev = get_radeon_device(kgd); - - writel(value, (void __iomem *)(rdev->rmmio + offset)); -} - -static uint32_t read_register(struct kgd_dev *kgd, uint32_t offset) -{ - struct radeon_device *rdev = get_radeon_device(kgd); - - return readl((void __iomem *)(rdev->rmmio + offset)); -} - -static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, - uint32_t queue, uint32_t vmid) -{ - struct radeon_device *rdev = get_radeon_device(kgd); - uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); - - mutex_lock(&rdev->srbm_mutex); - write_register(kgd, SRBM_GFX_CNTL, value); -} - -static void unlock_srbm(struct kgd_dev *kgd) -{ - struct radeon_device *rdev = get_radeon_device(kgd); - - write_register(kgd, SRBM_GFX_CNTL, 0); - mutex_unlock(&rdev->srbm_mutex); -} - -static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t queue_id) -{ - uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; - uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); - - lock_srbm(kgd, mec, pipe, queue_id, 0); -} - -static void release_queue(struct kgd_dev *kgd) -{ - unlock_srbm(kgd); -} - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, - uint32_t sh_mem_config, - uint32_t sh_mem_ape1_base, - uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases) -{ - lock_srbm(kgd, 0, 0, 0, vmid); - - write_register(kgd, SH_MEM_CONFIG, sh_mem_config); - write_register(kgd, SH_MEM_APE1_BASE, sh_mem_ape1_base); - write_register(kgd, SH_MEM_APE1_LIMIT, sh_mem_ape1_limit); - write_register(kgd, SH_MEM_BASES, sh_mem_bases); - - unlock_srbm(kgd); -} - -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, - unsigned int vmid) -{ - /* - * We have to assume that there is no outstanding mapping. - * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 - * because a mapping is in progress or because a mapping finished and - * the SW cleared it. - * So the protocol is to always wait & clear. - */ - uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | - ATC_VMID_PASID_MAPPING_VALID_MASK; - - write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t), - pasid_mapping); - - while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) & - (1U << vmid))) - cpu_relax(); - write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); - - /* Mapping vmid to pasid also for IH block */ - write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t), - pasid_mapping); - - return 0; -} - -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr) -{ - /* nothing to do here */ - return 0; -} - -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) -{ - uint32_t mec; - uint32_t pipe; - - mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; - pipe = (pipe_id % CIK_PIPE_PER_MEC); - - lock_srbm(kgd, mec, pipe, 0, 0); - - write_register(kgd, CPC_INT_CNTL, - TIME_STAMP_INT_ENABLE | OPCODE_ERROR_INT_ENABLE); - - unlock_srbm(kgd); - - return 0; -} - -static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) -{ - uint32_t retval; - - retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + - m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; - - pr_debug("kfd: sdma base address: 0x%x\n", retval); - - return retval; -} - -static inline struct cik_mqd *get_mqd(void *mqd) -{ - return (struct cik_mqd *)mqd; -} - -static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) -{ - return (struct cik_sdma_rlc_registers *)mqd; -} - -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) -{ - uint32_t wptr_shadow, is_wptr_shadow_valid; - struct cik_mqd *m; - - m = get_mqd(mqd); - - is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); - - acquire_queue(kgd, pipe_id, queue_id); - write_register(kgd, CP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); - write_register(kgd, CP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); - write_register(kgd, CP_MQD_CONTROL, m->cp_mqd_control); - - write_register(kgd, CP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); - write_register(kgd, CP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); - write_register(kgd, CP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); - - write_register(kgd, CP_HQD_IB_CONTROL, m->cp_hqd_ib_control); - write_register(kgd, CP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo); - write_register(kgd, CP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi); - - write_register(kgd, CP_HQD_IB_RPTR, m->cp_hqd_ib_rptr); - - write_register(kgd, CP_HQD_PERSISTENT_STATE, - m->cp_hqd_persistent_state); - write_register(kgd, CP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd); - write_register(kgd, CP_HQD_MSG_TYPE, m->cp_hqd_msg_type); - - write_register(kgd, CP_HQD_ATOMIC0_PREOP_LO, - m->cp_hqd_atomic0_preop_lo); - - write_register(kgd, CP_HQD_ATOMIC0_PREOP_HI, - m->cp_hqd_atomic0_preop_hi); - - write_register(kgd, CP_HQD_ATOMIC1_PREOP_LO, - m->cp_hqd_atomic1_preop_lo); - - write_register(kgd, CP_HQD_ATOMIC1_PREOP_HI, - m->cp_hqd_atomic1_preop_hi); - - write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR, - m->cp_hqd_pq_rptr_report_addr_lo); - - write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR_HI, - m->cp_hqd_pq_rptr_report_addr_hi); - - write_register(kgd, CP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr); - - write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR, - m->cp_hqd_pq_wptr_poll_addr_lo); - - write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR_HI, - m->cp_hqd_pq_wptr_poll_addr_hi); - - write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, - m->cp_hqd_pq_doorbell_control); - - write_register(kgd, CP_HQD_VMID, m->cp_hqd_vmid); - - write_register(kgd, CP_HQD_QUANTUM, m->cp_hqd_quantum); - - write_register(kgd, CP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); - write_register(kgd, CP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); - - write_register(kgd, CP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr); - - if (is_wptr_shadow_valid) - write_register(kgd, CP_HQD_PQ_WPTR, wptr_shadow); - - write_register(kgd, CP_HQD_ACTIVE, m->cp_hqd_active); - release_queue(kgd); - - return 0; -} - -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) -{ - struct cik_sdma_rlc_registers *m; - uint32_t sdma_base_addr; - - m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); - - write_register(kgd, - sdma_base_addr + SDMA0_RLC0_VIRTUAL_ADDR, - m->sdma_rlc_virtual_addr); - - write_register(kgd, - sdma_base_addr + SDMA0_RLC0_RB_BASE, - m->sdma_rlc_rb_base); - - write_register(kgd, - sdma_base_addr + SDMA0_RLC0_RB_BASE_HI, - m->sdma_rlc_rb_base_hi); - - write_register(kgd, - sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_LO, - m->sdma_rlc_rb_rptr_addr_lo); - - write_register(kgd, - sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_HI, - m->sdma_rlc_rb_rptr_addr_hi); - - write_register(kgd, - sdma_base_addr + SDMA0_RLC0_DOORBELL, - m->sdma_rlc_doorbell); - - write_register(kgd, - sdma_base_addr + SDMA0_RLC0_RB_CNTL, - m->sdma_rlc_rb_cntl); - - return 0; -} - -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id) -{ - uint32_t act; - bool retval = false; - uint32_t low, high; - - acquire_queue(kgd, pipe_id, queue_id); - act = read_register(kgd, CP_HQD_ACTIVE); - if (act) { - low = lower_32_bits(queue_address >> 8); - high = upper_32_bits(queue_address >> 8); - - if (low == read_register(kgd, CP_HQD_PQ_BASE) && - high == read_register(kgd, CP_HQD_PQ_BASE_HI)) - retval = true; - } - release_queue(kgd); - return retval; -} - -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) -{ - struct cik_sdma_rlc_registers *m; - uint32_t sdma_base_addr; - uint32_t sdma_rlc_rb_cntl; - - m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); - - sdma_rlc_rb_cntl = read_register(kgd, - sdma_base_addr + SDMA0_RLC0_RB_CNTL); - - if (sdma_rlc_rb_cntl & SDMA_RB_ENABLE) - return true; - - return false; -} - -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, - unsigned int timeout, uint32_t pipe_id, - uint32_t queue_id) -{ - uint32_t temp; - - acquire_queue(kgd, pipe_id, queue_id); - write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 0); - - write_register(kgd, CP_HQD_DEQUEUE_REQUEST, reset_type); - - while (true) { - temp = read_register(kgd, CP_HQD_ACTIVE); - if (temp & 0x1) - break; - if (timeout == 0) { - pr_err("kfd: cp queue preemption time out (%dms)\n", - temp); - release_queue(kgd); - return -ETIME; - } - msleep(20); - timeout -= 20; - } - - release_queue(kgd); - return 0; -} - -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, - unsigned int timeout) -{ - struct cik_sdma_rlc_registers *m; - uint32_t sdma_base_addr; - uint32_t temp; - - m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); - - temp = read_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL); - temp = temp & ~SDMA_RB_ENABLE; - write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL, temp); - - while (true) { - temp = read_register(kgd, sdma_base_addr + - SDMA0_RLC0_CONTEXT_STATUS); - if (temp & SDMA_RLC_IDLE) - break; - if (timeout == 0) - return -ETIME; - msleep(20); - timeout -= 20; - } - - write_register(kgd, sdma_base_addr + SDMA0_RLC0_DOORBELL, 0); - write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_RPTR, 0); - write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_WPTR, 0); - write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_BASE, 0); - - return 0; -} - -static int kgd_address_watch_disable(struct kgd_dev *kgd) -{ - union TCP_WATCH_CNTL_BITS cntl; - unsigned int i; - - cntl.u32All = 0; - - cntl.bitfields.valid = 0; - cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; - cntl.bitfields.atc = 1; - - /* Turning off this address until we set all the registers */ - for (i = 0; i < MAX_WATCH_ADDRESSES; i++) - write_register(kgd, - watchRegs[i * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], - cntl.u32All); - - return 0; -} - -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - union TCP_WATCH_CNTL_BITS cntl; - - cntl.u32All = cntl_val; - - /* Turning off this watch point until we set all the registers */ - cntl.bitfields.valid = 0; - write_register(kgd, - watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], - cntl.u32All); - - write_register(kgd, - watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_ADDR_HI], - addr_hi); - - write_register(kgd, - watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_ADDR_LO], - addr_lo); - - /* Enable the watch point */ - cntl.bitfields.valid = 1; - - write_register(kgd, - watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], - cntl.u32All); - - return 0; -} - -static int kgd_wave_control_execute(struct kgd_dev *kgd, - uint32_t gfx_index_val, - uint32_t sq_cmd) -{ - struct radeon_device *rdev = get_radeon_device(kgd); - uint32_t data; - - mutex_lock(&rdev->grbm_idx_mutex); - - write_register(kgd, GRBM_GFX_INDEX, gfx_index_val); - write_register(kgd, SQ_CMD, sq_cmd); - - /* Restore the GRBM_GFX_INDEX register */ - - data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES | - SE_BROADCAST_WRITES; - - write_register(kgd, GRBM_GFX_INDEX, data); - - mutex_unlock(&rdev->grbm_idx_mutex); - - return 0; -} - -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset) -{ - return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset] - / 4; -} - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid) -{ - uint32_t reg; - struct radeon_device *rdev = (struct radeon_device *) kgd; - - reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4); - return reg & ATC_VMID_PASID_MAPPING_VALID_MASK; -} - -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct radeon_device *rdev = (struct radeon_device *) kgd; - - reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4); - return reg & ATC_VMID_PASID_MAPPING_PASID_MASK; -} - -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ - struct radeon_device *rdev = (struct radeon_device *) kgd; - - return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid); -} - -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) -{ - struct radeon_device *rdev = (struct radeon_device *) kgd; - const union radeon_firmware_header *hdr; - - BUG_ON(kgd == NULL || rdev->mec_fw == NULL); - - switch (type) { - case KGD_ENGINE_PFP: - hdr = (const union radeon_firmware_header *) rdev->pfp_fw->data; - break; - - case KGD_ENGINE_ME: - hdr = (const union radeon_firmware_header *) rdev->me_fw->data; - break; - - case KGD_ENGINE_CE: - hdr = (const union radeon_firmware_header *) rdev->ce_fw->data; - break; - - case KGD_ENGINE_MEC1: - hdr = (const union radeon_firmware_header *) rdev->mec_fw->data; - break; - - case KGD_ENGINE_MEC2: - hdr = (const union radeon_firmware_header *) - rdev->mec2_fw->data; - break; - - case KGD_ENGINE_RLC: - hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data; - break; - - case KGD_ENGINE_SDMA1: - case KGD_ENGINE_SDMA2: - hdr = (const union radeon_firmware_header *) - rdev->sdma_fw->data; - break; - - default: - return 0; - } - - if (hdr == NULL) - return 0; - - /* Only 12 bit in use*/ - return hdr->common.ucode_version; -} diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index dfee8f7d94ae..cde037f213d7 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -34,8 +34,6 @@ #include <linux/slab.h> #include <linux/pm_runtime.h> -#include "radeon_kfd.h" - #if defined(CONFIG_VGA_SWITCHEROO) bool radeon_has_atpx(void); #else @@ -68,8 +66,6 @@ void radeon_driver_unload_kms(struct drm_device *dev) pm_runtime_forbid(dev->dev); } - radeon_kfd_device_fini(rdev); - radeon_acpi_fini(rdev); radeon_modeset_fini(rdev); @@ -174,9 +170,6 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) "Error during ACPI methods call\n"); } - radeon_kfd_device_probe(rdev); - radeon_kfd_device_init(rdev); - if (radeon_is_px(dev)) { pm_runtime_use_autosuspend(dev->dev); pm_runtime_set_autosuspend_delay(dev->dev, 5000); diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig index 3c70c6224bd2..0ccc76217ee4 100644 --- a/drivers/gpu/drm/rockchip/Kconfig +++ b/drivers/gpu/drm/rockchip/Kconfig @@ -60,7 +60,7 @@ config ROCKCHIP_INNO_HDMI config ROCKCHIP_LVDS bool "Rockchip LVDS support" depends on DRM_ROCKCHIP - depends on PINCTRL + depends on PINCTRL && OF help Choose this option to enable support for Rockchip LVDS controllers. Rockchip rk3288 SoC has LVDS TX Controller can be used, and it diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c index 4d3f6ad0abdd..93b7102dd008 100644 --- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c +++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c @@ -72,7 +72,7 @@ struct rockchip_dp_device { struct reset_control *rst; struct work_struct psr_work; - spinlock_t psr_lock; + struct mutex psr_lock; unsigned int psr_state; const struct rockchip_dp_chip_data *data; @@ -83,21 +83,20 @@ struct rockchip_dp_device { static void analogix_dp_psr_set(struct drm_encoder *encoder, bool enabled) { struct rockchip_dp_device *dp = to_dp(encoder); - unsigned long flags; if (!analogix_dp_psr_supported(dp->dev)) return; DRM_DEV_DEBUG(dp->dev, "%s PSR...\n", enabled ? "Entry" : "Exit"); - spin_lock_irqsave(&dp->psr_lock, flags); + mutex_lock(&dp->psr_lock); if (enabled) dp->psr_state = EDP_VSC_PSR_STATE_ACTIVE; else dp->psr_state = ~EDP_VSC_PSR_STATE_ACTIVE; schedule_work(&dp->psr_work); - spin_unlock_irqrestore(&dp->psr_lock, flags); + mutex_unlock(&dp->psr_lock); } static void analogix_dp_psr_work(struct work_struct *work) @@ -105,7 +104,6 @@ static void analogix_dp_psr_work(struct work_struct *work) struct rockchip_dp_device *dp = container_of(work, typeof(*dp), psr_work); int ret; - unsigned long flags; ret = rockchip_drm_wait_vact_end(dp->encoder.crtc, PSR_WAIT_LINE_FLAG_TIMEOUT_MS); @@ -114,12 +112,12 @@ static void analogix_dp_psr_work(struct work_struct *work) return; } - spin_lock_irqsave(&dp->psr_lock, flags); + mutex_lock(&dp->psr_lock); if (dp->psr_state == EDP_VSC_PSR_STATE_ACTIVE) analogix_dp_enable_psr(dp->dev); else analogix_dp_disable_psr(dp->dev); - spin_unlock_irqrestore(&dp->psr_lock, flags); + mutex_unlock(&dp->psr_lock); } static int rockchip_dp_pre_init(struct rockchip_dp_device *dp) @@ -381,7 +379,7 @@ static int rockchip_dp_bind(struct device *dev, struct device *master, dp->plat_data.power_off = rockchip_dp_powerdown; dp->plat_data.get_modes = rockchip_dp_get_modes; - spin_lock_init(&dp->psr_lock); + mutex_init(&dp->psr_lock); dp->psr_state = ~EDP_VSC_PSR_STATE_ACTIVE; INIT_WORK(&dp->psr_work, analogix_dp_psr_work); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d79607a1187c..c088703777e2 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -150,8 +150,7 @@ static void ttm_bo_release_list(struct kref *list_kref) ttm_tt_destroy(bo->ttm); atomic_dec(&bo->glob->bo_count); dma_fence_put(bo->moving); - if (bo->resv == &bo->ttm_resv) - reservation_object_fini(&bo->ttm_resv); + reservation_object_fini(&bo->ttm_resv); mutex_destroy(&bo->wu_mutex); if (bo->destroy) bo->destroy(bo); @@ -402,14 +401,11 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) if (bo->resv == &bo->ttm_resv) return 0; - reservation_object_init(&bo->ttm_resv); BUG_ON(!reservation_object_trylock(&bo->ttm_resv)); r = reservation_object_copy_fences(&bo->ttm_resv, bo->resv); - if (r) { + if (r) reservation_object_unlock(&bo->ttm_resv); - reservation_object_fini(&bo->ttm_resv); - } return r; } @@ -459,6 +455,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) spin_unlock(&glob->lru_lock); if (bo->resv != &bo->ttm_resv) reservation_object_unlock(&bo->ttm_resv); + ttm_bo_cleanup_memtype_use(bo); return; } @@ -557,8 +554,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, } ttm_bo_del_from_lru(bo); - if (!list_empty(&bo->ddestroy) && (bo->resv != &bo->ttm_resv)) - reservation_object_fini(&bo->ttm_resv); list_del_init(&bo->ddestroy); kref_put(&bo->list_kref, ttm_bo_ref_bug); @@ -1207,8 +1202,8 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev, lockdep_assert_held(&bo->resv->lock.base); } else { bo->resv = &bo->ttm_resv; - reservation_object_init(&bo->ttm_resv); } + reservation_object_init(&bo->ttm_resv); atomic_inc(&bo->glob->bo_count); drm_vma_node_reset(&bo->vma_node); bo->priority = 0; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 78cb99be7146..e7a519f1849b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -474,6 +474,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, INIT_LIST_HEAD(&fbo->lru); INIT_LIST_HEAD(&fbo->swap); INIT_LIST_HEAD(&fbo->io_reserve_lru); + mutex_init(&fbo->wu_mutex); fbo->moving = NULL; drm_vma_node_reset(&fbo->vma_node); atomic_set(&fbo->cpu_writers, 0); diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 4d688c8d7853..316f831ad5f0 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -329,7 +329,7 @@ static int ttm_page_pool_free(struct ttm_page_pool *pool, unsigned nr_free, pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), GFP_KERNEL); if (!pages_to_free) { - pr_err("Failed to allocate memory for pool free operation\n"); + pr_debug("Failed to allocate memory for pool free operation\n"); return 0; } @@ -517,7 +517,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags, caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL); if (!caching_array) { - pr_err("Unable to allocate table for new pages\n"); + pr_debug("Unable to allocate table for new pages\n"); return -ENOMEM; } @@ -525,7 +525,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags, p = alloc_pages(gfp_flags, order); if (!p) { - pr_err("Unable to get page %u\n", i); + pr_debug("Unable to get page %u\n", i); /* store already allocated pages in the pool after * setting the caching state */ @@ -625,7 +625,7 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, int ttm_flags, ++pool->nrefills; pool->npages += alloc_size; } else { - pr_err("Failed to fill pool (%p)\n", pool); + pr_debug("Failed to fill pool (%p)\n", pool); /* If we have any pages left put them to the pool. */ list_for_each_entry(p, &new_pages, lru) { ++cpages; @@ -885,8 +885,7 @@ static int ttm_get_pages(struct page **pages, unsigned npages, int flags, while (npages) { p = alloc_page(gfp_flags); if (!p) { - - pr_err("Unable to allocate page\n"); + pr_debug("Unable to allocate page\n"); return -ENOMEM; } @@ -925,7 +924,7 @@ static int ttm_get_pages(struct page **pages, unsigned npages, int flags, /* If there is any pages in the list put them back to * the pool. */ - pr_err("Failed to allocate extra pages for large request\n"); + pr_debug("Failed to allocate extra pages for large request\n"); ttm_put_pages(pages, count, flags, cstate); return r; } diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index 96ad12906621..6b2627fe9bc1 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -463,7 +463,7 @@ static unsigned ttm_dma_page_pool_free(struct dma_pool *pool, unsigned nr_free, GFP_KERNEL); if (!pages_to_free) { - pr_err("%s: Failed to allocate memory for pool free operation\n", + pr_debug("%s: Failed to allocate memory for pool free operation\n", pool->dev_name); return 0; } @@ -755,7 +755,7 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool, caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL); if (!caching_array) { - pr_err("%s: Unable to allocate table for new pages\n", + pr_debug("%s: Unable to allocate table for new pages\n", pool->dev_name); return -ENOMEM; } @@ -768,8 +768,8 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool, for (i = 0, cpages = 0; i < count; ++i) { dma_p = __ttm_dma_alloc_page(pool); if (!dma_p) { - pr_err("%s: Unable to get page %u\n", - pool->dev_name, i); + pr_debug("%s: Unable to get page %u\n", + pool->dev_name, i); /* store already allocated pages in the pool after * setting the caching state */ @@ -855,8 +855,8 @@ static int ttm_dma_page_pool_fill_locked(struct dma_pool *pool, struct dma_page *d_page; unsigned cpages = 0; - pr_err("%s: Failed to fill %s pool (r:%d)!\n", - pool->dev_name, pool->name, r); + pr_debug("%s: Failed to fill %s pool (r:%d)!\n", + pool->dev_name, pool->name, r); list_for_each_entry(d_page, &d_pages, page_list) { cpages++; diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 01a53ba304f8..98a6cb9f44fc 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -88,11 +88,11 @@ int vc4_bo_stats_debugfs(struct seq_file *m, void *unused) mutex_lock(&vc4->purgeable.lock); if (vc4->purgeable.num) - seq_printf(m, "%30s: %6dkb BOs (%d)\n", "userspace BO cache", + seq_printf(m, "%30s: %6zdkb BOs (%d)\n", "userspace BO cache", vc4->purgeable.size / 1024, vc4->purgeable.num); if (vc4->purgeable.purged_num) - seq_printf(m, "%30s: %6dkb BOs (%d)\n", "total purged BO", + seq_printf(m, "%30s: %6zdkb BOs (%d)\n", "total purged BO", vc4->purgeable.purged_size / 1024, vc4->purgeable.purged_num); mutex_unlock(&vc4->purgeable.lock); |