diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_gtt.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_gtt.c | 181 |
1 files changed, 116 insertions, 65 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c3028722d4e3..0b81e0b64393 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -32,6 +32,7 @@ #include <linux/stop_machine.h> #include <asm/set_memory.h> +#include <asm/smp.h> #include <drm/i915_drm.h> @@ -42,7 +43,6 @@ #include "i915_scatterlist.h" #include "i915_trace.h" #include "i915_vgpu.h" -#include "intel_drv.h" #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) @@ -120,7 +120,7 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma); static void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) { - struct intel_uncore *uncore = &ggtt->vm.i915->uncore; + struct intel_uncore *uncore = ggtt->vm.gt->uncore; /* * Note that as an uncached mmio write, this will flush the @@ -131,7 +131,7 @@ static void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) { - struct intel_uncore *uncore = &ggtt->vm.i915->uncore; + struct intel_uncore *uncore = ggtt->vm.gt->uncore; gen6_ggtt_invalidate(ggtt); intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); @@ -772,7 +772,8 @@ __set_pd_entry(struct i915_page_directory * const pd, struct i915_page_dma * const to, u64 (*encode)(const dma_addr_t, const enum i915_cache_level)) { - GEM_BUG_ON(atomic_read(px_used(pd)) > ARRAY_SIZE(pd->entry)); + /* Each thread pre-pins the pd, and we may have a thread per pde. */ + GEM_BUG_ON(atomic_read(px_used(pd)) > 2 * ARRAY_SIZE(pd->entry)); atomic_inc(px_used(pd)); pd->entry[idx] = to; @@ -911,6 +912,23 @@ static inline unsigned int gen8_pd_top_count(const struct i915_address_space *vm return (vm->total + (1ull << shift) - 1) >> shift; } +static inline struct i915_page_directory * +gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx) +{ + struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); + + if (vm->top == 2) + return ppgtt->pd; + else + return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top)); +} + +static inline struct i915_page_directory * +gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr) +{ + return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT); +} + static void __gen8_ppgtt_cleanup(struct i915_address_space *vm, struct i915_page_directory *pd, int count, int lvl) @@ -947,8 +965,10 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; unsigned int idx, len; + GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); + len = gen8_pd_range(start, end, lvl--, &idx); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d}\n", + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", __func__, vm, lvl + 1, start, end, idx, len, atomic_read(px_used(pd))); GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); @@ -974,7 +994,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, u64 *vaddr; count = gen8_pt_count(start, end); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d} removing pte\n", + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", __func__, vm, lvl, start, end, gen8_pd_index(start, 0), count, atomic_read(&pt->used)); @@ -1002,6 +1022,7 @@ static void gen8_ppgtt_clear(struct i915_address_space *vm, { GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(range_overflows(start, length, vm->total)); start >>= GEN8_PTE_SHIFT; length >>= GEN8_PTE_SHIFT; @@ -1013,15 +1034,17 @@ static void gen8_ppgtt_clear(struct i915_address_space *vm, static int __gen8_ppgtt_alloc(struct i915_address_space * const vm, struct i915_page_directory * const pd, - u64 * const start, u64 end, int lvl) + u64 * const start, const u64 end, int lvl) { const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; struct i915_page_table *alloc = NULL; unsigned int idx, len; int ret = 0; + GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); + len = gen8_pd_range(*start, end, lvl--, &idx); - DBG("%s(%p):{lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d}\n", + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", __func__, vm, lvl + 1, *start, end, idx, len, atomic_read(px_used(pd))); GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1)); @@ -1087,13 +1110,14 @@ static int __gen8_ppgtt_alloc(struct i915_address_space * const vm, } else { unsigned int count = gen8_pt_count(*start, end); - DBG("%s(%p):{lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d} inserting pte\n", + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", __func__, vm, lvl, *start, end, gen8_pd_index(*start, 0), count, atomic_read(&pt->used)); atomic_add(count, &pt->used); - GEM_BUG_ON(atomic_read(&pt->used) > I915_PDES); + /* All other pdes may be simultaneously removed */ + GEM_BUG_ON(atomic_read(&pt->used) > 2 * I915_PDES); *start += count; } } while (idx++, --len); @@ -1112,6 +1136,7 @@ static int gen8_ppgtt_alloc(struct i915_address_space *vm, GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(range_overflows(start, length, vm->total)); start >>= GEN8_PTE_SHIFT; length >>= GEN8_PTE_SHIFT; @@ -1137,12 +1162,12 @@ static inline struct sgt_dma { } static __always_inline u64 -gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt, - struct i915_page_directory *pdp, - struct sgt_dma *iter, - u64 idx, - enum i915_cache_level cache_level, - u32 flags) +gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, + struct i915_page_directory *pdp, + struct sgt_dma *iter, + u64 idx, + enum i915_cache_level cache_level, + u32 flags) { struct i915_page_directory *pd; const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); @@ -1183,35 +1208,21 @@ gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt, return idx; } -static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, - struct i915_vma *vma, +static void gen8_ppgtt_insert_huge(struct i915_vma *vma, + struct sgt_dma *iter, enum i915_cache_level cache_level, u32 flags) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct sgt_dma iter = sgt_dma(vma); - - gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter, - vma->node.start >> GEN8_PTE_SHIFT, - cache_level, flags); - - vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; -} - -static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, - struct i915_page_directory *pml4, - struct sgt_dma *iter, - enum i915_cache_level cache_level, - u32 flags) -{ const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); u64 start = vma->node.start; dma_addr_t rem = iter->sg->length; + GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm)); + do { - struct i915_page_directory *pdp = - i915_pd_entry(pml4, __gen8_pte_index(start, 3)); - struct i915_page_directory *pd = + struct i915_page_directory * const pdp = + gen8_pdp_for_page_address(vma->vm, start); + struct i915_page_directory * const pd = i915_pd_entry(pdp, __gen8_pte_index(start, 2)); gen8_pte_t encode = pte_encode; unsigned int maybe_64K = -1; @@ -1317,26 +1328,26 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, } while (iter->sg); } -static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) +static void gen8_ppgtt_insert(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); struct sgt_dma iter = sgt_dma(vma); - struct i915_page_directory * const pml4 = ppgtt->pd; if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { - gen8_ppgtt_insert_huge_entries(vma, pml4, &iter, cache_level, - flags); - } else { + gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags); + } else { u64 idx = vma->node.start >> GEN8_PTE_SHIFT; - while ((idx = gen8_ppgtt_insert_pte_entries(ppgtt, - i915_pd_entry(pml4, gen8_pd_index(idx, 3)), - &iter, idx, cache_level, - flags))) - ; + do { + struct i915_page_directory * const pdp = + gen8_pdp_for_page_index(vm, idx); + + idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx, + cache_level, flags); + } while (idx); vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; } @@ -1495,18 +1506,15 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) goto err_free_scratch; } - if (i915_vm_is_4lvl(&ppgtt->vm)) { - ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl; - } else { + if (!i915_vm_is_4lvl(&ppgtt->vm)) { if (intel_vgpu_active(i915)) { err = gen8_preallocate_top_level_pdp(ppgtt); if (err) goto err_free_pd; } - - ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl; } + ppgtt->vm.insert_entries = gen8_ppgtt_insert; ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ppgtt->vm.clear_range = gen8_ppgtt_clear; @@ -1868,7 +1876,6 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) return ERR_PTR(-ENOMEM); i915_active_init(i915, &vma->active, NULL, NULL); - INIT_ACTIVE_REQUEST(&vma->last_fence); vma->vm = &ggtt->vm; vma->ops = &pd_vma_ops; @@ -2036,6 +2043,27 @@ static void gtt_write_workarounds(struct intel_gt *gt) GEN8_GAMW_ECO_DEV_RW_IA, 0, GAMW_ECO_ENABLE_64K_IPS_FIELD); + + if (IS_GEN_RANGE(i915, 8, 11)) { + bool can_use_gtt_cache = true; + + /* + * According to the BSpec if we use 2M/1G pages then we also + * need to disable the GTT cache. At least on BDW we can see + * visual corruption when using 2M pages, and not disabling the + * GTT cache. + */ + if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) + can_use_gtt_cache = false; + + /* WaGttCachingOffByDefault */ + intel_uncore_write(uncore, + HSW_GTT_CACHE_EN, + can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); + WARN_ON_ONCE(can_use_gtt_cache && + intel_uncore_read(uncore, + HSW_GTT_CACHE_EN) == 0); + } } int i915_ppgtt_init_hw(struct intel_gt *gt) @@ -2843,6 +2871,19 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return 0; } +static void tgl_setup_private_ppat(struct drm_i915_private *dev_priv) +{ + /* TGL doesn't support LLC or AGE settings */ + I915_WRITE(GEN12_PAT_INDEX(0), GEN8_PPAT_WB); + I915_WRITE(GEN12_PAT_INDEX(1), GEN8_PPAT_WC); + I915_WRITE(GEN12_PAT_INDEX(2), GEN8_PPAT_WT); + I915_WRITE(GEN12_PAT_INDEX(3), GEN8_PPAT_UC); + I915_WRITE(GEN12_PAT_INDEX(4), GEN8_PPAT_WB); + I915_WRITE(GEN12_PAT_INDEX(5), GEN8_PPAT_WB); + I915_WRITE(GEN12_PAT_INDEX(6), GEN8_PPAT_WB); + I915_WRITE(GEN12_PAT_INDEX(7), GEN8_PPAT_WB); +} + static void cnl_setup_private_ppat(struct drm_i915_private *dev_priv) { I915_WRITE(GEN10_PAT_INDEX(0), GEN8_PPAT_WB | GEN8_PPAT_LLC); @@ -2923,7 +2964,9 @@ static void setup_private_pat(struct drm_i915_private *dev_priv) { GEM_BUG_ON(INTEL_GEN(dev_priv) < 8); - if (INTEL_GEN(dev_priv) >= 10) + if (INTEL_GEN(dev_priv) >= 12) + tgl_setup_private_ppat(dev_priv); + else if (INTEL_GEN(dev_priv) >= 10) cnl_setup_private_ppat(dev_priv); else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) chv_setup_private_ppat(dev_priv); @@ -3085,7 +3128,8 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.vma_ops.clear_pages = clear_pages; if (unlikely(ggtt->do_idle_maps)) - DRM_INFO("applying Ironlake quirks for intel_iommu\n"); + dev_notice(dev_priv->drm.dev, + "Applying Ironlake quirks for intel_iommu\n"); return 0; } @@ -3146,7 +3190,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) return ret; if (intel_vtd_active()) - DRM_INFO("VT-d active for gfx access\n"); + dev_info(i915->drm.dev, "VT-d active for gfx access\n"); return 0; } @@ -3254,6 +3298,7 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) static void ggtt_restore_mappings(struct i915_ggtt *ggtt) { struct i915_vma *vma, *vn; + bool flush = false; intel_gt_check_and_clear_faults(ggtt->vm.gt); @@ -3278,10 +3323,9 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt) WARN_ON(i915_vma_bind(vma, obj ? obj->cache_level : 0, PIN_UPDATE)); - if (obj) { - i915_gem_object_lock(obj); - WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); - i915_gem_object_unlock(obj); + if (obj) { /* only used during resume => exclusive access */ + flush |= fetch_and_zero(&obj->write_domain); + obj->read_domains |= I915_GEM_DOMAIN_GTT; } lock: @@ -3292,6 +3336,9 @@ lock: ggtt->invalidate(ggtt); mutex_unlock(&ggtt->vm.mutex); + + if (flush) + wbinvd_on_all_cpus(); } void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915) @@ -3728,7 +3775,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, if (flags & PIN_NOEVICT) return -ENOSPC; - /* No free space, pick a slot at random. + /* + * No free space, pick a slot at random. * * There is a pathological case here using a GTT shared between * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt): @@ -3756,6 +3804,9 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, if (err != -ENOSPC) return err; + if (flags & PIN_NOSEARCH) + return -ENOSPC; + /* Randomly selected placement is pinned, do a search */ err = i915_gem_evict_something(vm, size, alignment, color, start, end, flags); |