diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-02 07:59:23 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-02 07:59:23 -0700 |
| commit | 320b164abb32db876866a4ff8c2cb710524ac6ea (patch) | |
| tree | 1f79119cde6e24c9f1d01fb1e51252bca7c4cdd5 /drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | |
| parent | 0adb32858b0bddf4ada5f364a84ed60b196dbcda (diff) | |
| parent | 694f54f680f7fd8e9561928fbfc537d9afbc3d79 (diff) | |
| download | talos-op-linux-320b164abb32db876866a4ff8c2cb710524ac6ea.tar.gz talos-op-linux-320b164abb32db876866a4ff8c2cb710524ac6ea.zip | |
Merge tag 'drm-for-v4.17' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie:
"Cannonlake and Vega12 support are probably the two major things. This
pull lacks nouveau, Ben had some unforseen leave and a few other
blockers so we'll see how things look or maybe leave it for this merge
window.
core:
- Device links to handle sound/gpu pm dependency
- Color encoding/range properties
- Plane clipping into plane check helper
- Backlight helpers
- DP TP4 + HBR3 helper support
amdgpu:
- Vega12 support
- Enable DC by default on all supported GPUs
- Powerplay restructuring and cleanup
- DC bandwidth calc updates
- DC backlight on pre-DCE11
- TTM backing store dropping support
- SR-IOV fixes
- Adding "wattman" like functionality
- DC crc support
- Improved DC dual-link handling
amdkfd:
- GPUVM support for dGPU
- KFD events for dGPU
- Enable PCIe atomics for dGPUs
- HSA process eviction support
- Live-lock fixes for process eviction
- VM page table allocation fix for large-bar systems
panel:
- Raydium RM68200
- AUO G104SN02 V2
- KEO TX31D200VM0BAA
- ARM Versatile panels
i915:
- Cannonlake support enabled
- AUX-F port support added
- Icelake base enabling until internal milestone of forcewake support
- Query uAPI interface (used for GPU topology information currently)
- Compressed framebuffer support for sprites
- kmem cache shrinking when GPU is idle
- Avoid boosting GPU when waited item is being processed already
- Avoid retraining LSPCON link unnecessarily
- Decrease request signaling latency
- Deprecation of I915_SET_COLORKEY_NONE
- Kerneldoc and compiler warning cleanup for upcoming CI enforcements
- Full range ycbcr toggling
- HDCP support
i915/gvt:
- Big refactor for shadow ppgtt
- KBL context save/restore via LRI cmd (Weinan)
- Properly unmap dma for guest page (Changbin)
vmwgfx:
- Lots of various improvements
etnaviv:
- Use the drm gpu scheduler
- prep work for GC7000L support
vc4:
- fix alpha blending
- Expose perf counters to userspace
pl111:
- Bandwidth checking/limiting
- Versatile panel support
sun4i:
- A83T HDMI support
- A80 support
- YUV plane support
- H3/H5 HDMI support
omapdrm:
- HPD support for DVI connector
- remove lots of static variables
msm:
- DSI updates from 10nm / SDM845
- fix for race condition with a3xx/a4xx fence completion irq
- some refactoring/prep work for eventual a6xx support (ie. when we
have a userspace)
- a5xx debugfs enhancements
- some mdp5 fixes/cleanups to prepare for eventually merging
writeback
- support (ie. when we have a userspace)
tegra:
- mmap() fixes for fbdev devices
- Overlay plane for hw cursor fix
- dma-buf cache maintenance support
mali-dp:
- YUV->RGB conversion support
rockchip:
- rk3399/chromebook fixes and improvements
rcar-du:
- LVDS support move to drm bridge
- DT bindings for R8A77995
- Driver/DT support for R8A77970
tilcdc:
- DRM panel support"
* tag 'drm-for-v4.17' of git://people.freedesktop.org/~airlied/linux: (1646 commits)
drm/i915: Fix hibernation with ACPI S0 target state
drm/i915/execlists: Use a locked clear_bit() for synchronisation with interrupt
drm/i915: Specify which engines to reset following semaphore/event lockups
drm/i915/dp: Write to SET_POWER dpcd to enable MST hub.
drm/amdkfd: Use ordered workqueue to restore processes
drm/amdgpu: Fix acquiring VM on large-BAR systems
drm/amd/pp: clean header file hwmgr.h
drm/amd/pp: use mlck_table.count for array loop index limit
drm: Fix uabi regression by allowing garbage mode->type from userspace
drm/amdgpu: Add an ATPX quirk for hybrid laptop
drm/amdgpu: fix spelling mistake: "asssert" -> "assert"
drm/amd/pp: Add new asic support in pp_psm.c
drm/amd/pp: Clean up powerplay code on Vega12
drm/amd/pp: Add smu irq handlers for legacy asics
drm/amd/pp: Fix set wrong temperature range on smu7
drm/amdgpu: Don't change preferred domian when fallback GTT v5
drm/vmwgfx: Bump version patchlevel and date
drm/vmwgfx: use monotonic event timestamps
drm/vmwgfx: Unpin the screen object backup buffer when not used
drm/vmwgfx: Stricter count of legacy surface device resources
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 181 |
1 files changed, 115 insertions, 66 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3b7e7af09ead..e687363900bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -21,6 +21,7 @@ * */ #include <linux/firmware.h> +#include <drm/drm_cache.h> #include "amdgpu.h" #include "gmc_v9_0.h" #include "amdgpu_atomfirmware.h" @@ -33,6 +34,7 @@ #include "vega10_enum.h" #include "mmhub/mmhub_1_0_offset.h" #include "athub/athub_1_0_offset.h" +#include "oss/osssys_4_0_offset.h" #include "soc15.h" #include "soc15_common.h" @@ -262,10 +264,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (printk_ratelimit()) { dev_err(adev->dev, - "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pas_id:%u)\n", + "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", entry->vmid_src ? "mmhub" : "gfxhub", entry->src_id, entry->ring_id, entry->vmid, - entry->pas_id); + entry->pasid); dev_err(adev->dev, " at page 0x%016llx from %d\n", addr, entry->client_id); if (!amdgpu_sriov_vf(adev)) @@ -284,8 +286,8 @@ static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = { static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->mc.vm_fault.num_types = 1; - adev->mc.vm_fault.funcs = &gmc_v9_0_irq_funcs; + adev->gmc.vm_fault.num_types = 1; + adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs; } static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) @@ -315,24 +317,21 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) */ /** - * gmc_v9_0_gart_flush_gpu_tlb - gart tlb flush callback + * gmc_v9_0_flush_gpu_tlb - gart tlb flush callback * * @adev: amdgpu_device pointer * @vmid: vm instance to flush * * Flush the TLB for the requested page table. */ -static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, +static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid) { /* Use register 17 for GART */ const unsigned eng = 17; unsigned i, j; - /* flush hdp cache */ - adev->nbio_funcs->hdp_flush(adev); - - spin_lock(&adev->mc.invalidate_lock); + spin_lock(&adev->gmc.invalidate_lock); for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { struct amdgpu_vmhub *hub = &adev->vmhub[i]; @@ -365,11 +364,52 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, DRM_ERROR("Timeout waiting for VM flush ACK!\n"); } - spin_unlock(&adev->mc.invalidate_lock); + spin_unlock(&adev->gmc.invalidate_lock); +} + +static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; + uint32_t req = gmc_v9_0_get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; + unsigned eng = ring->vm_inv_eng; + + amdgpu_gmc_get_vm_pde(adev, -1, &pd_addr, &flags); + pd_addr |= flags; + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), + lower_32_bits(pd_addr)); + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), + upper_32_bits(pd_addr)); + + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); + + /* wait for the invalidate to complete */ + amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, + 1 << vmid, 1 << vmid); + + return pd_addr; +} + +static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, + unsigned pasid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reg; + + if (ring->funcs->vmhub == AMDGPU_GFXHUB) + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; + else + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; + + amdgpu_ring_emit_wreg(ring, reg, pasid); } /** - * gmc_v9_0_gart_set_pte_pde - update the page tables using MMIO + * gmc_v9_0_set_pte_pde - update the page tables using MMIO * * @adev: amdgpu_device pointer * @cpu_pt_addr: cpu address of the page table @@ -379,11 +419,9 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, * * Update the page tables using the CPU. */ -static int gmc_v9_0_gart_set_pte_pde(struct amdgpu_device *adev, - void *cpu_pt_addr, - uint32_t gpu_page_idx, - uint64_t addr, - uint64_t flags) +static int gmc_v9_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, + uint32_t gpu_page_idx, uint64_t addr, + uint64_t flags) { void __iomem *ptr = (void *)cpu_pt_addr; uint64_t value; @@ -474,10 +512,10 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, { if (!(*flags & AMDGPU_PDE_PTE)) *addr = adev->vm_manager.vram_base_offset + *addr - - adev->mc.vram_start; + adev->gmc.vram_start; BUG_ON(*addr & 0xFFFF00000000003FULL); - if (!adev->mc.translate_further) + if (!adev->gmc.translate_further) return; if (level == AMDGPU_VM_PDB1) { @@ -493,34 +531,35 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, } } -static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = { - .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb, - .set_pte_pde = gmc_v9_0_gart_set_pte_pde, - .get_invalidate_req = gmc_v9_0_get_invalidate_req, +static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { + .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, + .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, + .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, + .set_pte_pde = gmc_v9_0_set_pte_pde, .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, .get_vm_pde = gmc_v9_0_get_vm_pde }; -static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev) +static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) { - if (adev->gart.gart_funcs == NULL) - adev->gart.gart_funcs = &gmc_v9_0_gart_funcs; + if (adev->gmc.gmc_funcs == NULL) + adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs; } static int gmc_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gmc_v9_0_set_gart_funcs(adev); + gmc_v9_0_set_gmc_funcs(adev); gmc_v9_0_set_irq_funcs(adev); - adev->mc.shared_aperture_start = 0x2000000000000000ULL; - adev->mc.shared_aperture_end = - adev->mc.shared_aperture_start + (4ULL << 30) - 1; - adev->mc.private_aperture_start = - adev->mc.shared_aperture_end + 1; - adev->mc.private_aperture_end = - adev->mc.private_aperture_start + (4ULL << 30) - 1; + adev->gmc.shared_aperture_start = 0x2000000000000000ULL; + adev->gmc.shared_aperture_end = + adev->gmc.shared_aperture_start + (4ULL << 30) - 1; + adev->gmc.private_aperture_start = + adev->gmc.shared_aperture_end + 1; + adev->gmc.private_aperture_end = + adev->gmc.private_aperture_start + (4ULL << 30) - 1; return 0; } @@ -646,16 +685,16 @@ static int gmc_v9_0_late_init(void *handle) } } - return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); + return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); } static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, - struct amdgpu_mc *mc) + struct amdgpu_gmc *mc) { u64 base = 0; if (!amdgpu_sriov_vf(adev)) base = mmhub_v1_0_get_fb_location(adev); - amdgpu_device_vram_location(adev, &adev->mc, base); + amdgpu_device_vram_location(adev, &adev->gmc, base); amdgpu_device_gart_location(adev, mc); /* base offset of vram pages */ if (adev->flags & AMD_IS_APU) @@ -679,8 +718,9 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) int chansize, numchan; int r; - adev->mc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); - if (!adev->mc.vram_width) { + if (amdgpu_emu_mode != 1) + adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); + if (!adev->gmc.vram_width) { /* hbm memory channel size */ if (adev->flags & AMD_IS_APU) chansize = 64; @@ -720,43 +760,50 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) numchan = 2; break; } - adev->mc.vram_width = numchan * chansize; + adev->gmc.vram_width = numchan * chansize; } /* size in MB on si */ - adev->mc.mc_vram_size = + adev->gmc.mc_vram_size = adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL; - adev->mc.real_vram_size = adev->mc.mc_vram_size; + adev->gmc.real_vram_size = adev->gmc.mc_vram_size; if (!(adev->flags & AMD_IS_APU)) { r = amdgpu_device_resize_fb_bar(adev); if (r) return r; } - adev->mc.aper_base = pci_resource_start(adev->pdev, 0); - adev->mc.aper_size = pci_resource_len(adev->pdev, 0); + adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); + adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); +#ifdef CONFIG_X86_64 + if (adev->flags & AMD_IS_APU) { + adev->gmc.aper_base = gfxhub_v1_0_get_mc_fb_offset(adev); + adev->gmc.aper_size = adev->gmc.real_vram_size; + } +#endif /* In case the PCI BAR is larger than the actual amount of vram */ - adev->mc.visible_vram_size = adev->mc.aper_size; - if (adev->mc.visible_vram_size > adev->mc.real_vram_size) - adev->mc.visible_vram_size = adev->mc.real_vram_size; + adev->gmc.visible_vram_size = adev->gmc.aper_size; + if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) + adev->gmc.visible_vram_size = adev->gmc.real_vram_size; /* set the gart size */ if (amdgpu_gart_size == -1) { switch (adev->asic_type) { case CHIP_VEGA10: /* all engines support GPUVM */ + case CHIP_VEGA12: /* all engines support GPUVM */ default: - adev->mc.gart_size = 256ULL << 20; + adev->gmc.gart_size = 512ULL << 20; break; case CHIP_RAVEN: /* DCE SG support */ - adev->mc.gart_size = 1024ULL << 20; + adev->gmc.gart_size = 1024ULL << 20; break; } } else { - adev->mc.gart_size = (u64)amdgpu_gart_size << 20; + adev->gmc.gart_size = (u64)amdgpu_gart_size << 20; } - gmc_v9_0_vram_gtt_location(adev, &adev->mc); + gmc_v9_0_vram_gtt_location(adev, &adev->gmc); return 0; } @@ -788,23 +835,22 @@ static int gmc_v9_0_sw_init(void *handle) gfxhub_v1_0_init(adev); mmhub_v1_0_init(adev); - spin_lock_init(&adev->mc.invalidate_lock); + spin_lock_init(&adev->gmc.invalidate_lock); + adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); switch (adev->asic_type) { case CHIP_RAVEN: - adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); } else { /* vm_size is 128TB + 512GB for legacy 3-level page support */ amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48); - adev->mc.translate_further = + adev->gmc.translate_further = adev->vm_manager.num_level > 1; } break; case CHIP_VEGA10: - /* XXX Don't know how to get VRAM type yet. */ - adev->mc.vram_type = AMDGPU_VRAM_TYPE_HBM; + case CHIP_VEGA12: /* * To fulfill 4-level page support, * vm size is 256TB (48bit), maximum size of Vega10, @@ -817,10 +863,10 @@ static int gmc_v9_0_sw_init(void *handle) } /* This interrupt is VMC page fault.*/ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0, - &adev->mc.vm_fault); - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_UTCL2, 0, - &adev->mc.vm_fault); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, 0, + &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, 0, + &adev->gmc.vm_fault); if (r) return r; @@ -829,13 +875,13 @@ static int gmc_v9_0_sw_init(void *handle) * This is the max address of the GPU's * internal address space. */ - adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ + adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ /* * It needs to reserve 8M stolen memory for vega10 * TODO: Figure out how to avoid that... */ - adev->mc.stolen_size = 8 * 1024 * 1024; + adev->gmc.stolen_size = 8 * 1024 * 1024; /* set DMA mask + need_dma32 flags. * PCIE - can handle 44-bits. @@ -855,6 +901,7 @@ static int gmc_v9_0_sw_init(void *handle) pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); } + adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); r = gmc_v9_0_mc_init(adev); if (r) @@ -920,6 +967,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_athub_1_0_0, ARRAY_SIZE(golden_settings_athub_1_0_0)); break; + case CHIP_VEGA12: + break; case CHIP_RAVEN: soc15_program_register_sequence(adev, golden_settings_athub_1_0_0, @@ -976,7 +1025,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); /* After HDP is initialized, flush HDP.*/ - adev->nbio_funcs->hdp_flush(adev); + adev->nbio_funcs->hdp_flush(adev, NULL); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) value = false; @@ -985,10 +1034,10 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) gfxhub_v1_0_set_fault_enable_default(adev, value); mmhub_v1_0_set_fault_enable_default(adev, value); - gmc_v9_0_gart_flush_gpu_tlb(adev, 0); + gmc_v9_0_flush_gpu_tlb(adev, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->mc.gart_size >> 20), + (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)adev->gart.table_addr); adev->gart.ready = true; return 0; @@ -1039,7 +1088,7 @@ static int gmc_v9_0_hw_fini(void *handle) return 0; } - amdgpu_irq_put(adev, &adev->mc.vm_fault, 0); + amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); gmc_v9_0_gart_disable(adev); return 0; |

