diff options
| author | Dave Airlie <airlied@redhat.com> | 2019-02-11 14:04:05 +1000 | 
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2019-02-11 14:04:20 +1000 | 
| commit | f4bc54b532a62d8bee421ca06adb6d1b3e7ffaa9 (patch) | |
| tree | 3b835f9bed6bd236fa1a6d5d0add836f25ca8262 /drivers/gpu/drm/amd/amdgpu | |
| parent | 5ea3998d56346975c2701df18fb5b6e3ab5c8d9e (diff) | |
| parent | 0461221316ec21e0a535a35fba3feb6ba75706e6 (diff) | |
| download | blackbird-op-linux-f4bc54b532a62d8bee421ca06adb6d1b3e7ffaa9.tar.gz blackbird-op-linux-f4bc54b532a62d8bee421ca06adb6d1b3e7ffaa9.zip | |
Merge branch 'drm-next-5.1' of git://people.freedesktop.org/~agd5f/linux into drm-next
Updates for 5.1:
- GDS fixes
- Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES interface
- GPUVM fixes
- PCIE DPM switching fixes for vega20
- Vega10 uclk DPM regression fix
- DC Freesync fixes
- DC ABM fixes
- Various DC cleanups
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190208210214.27666-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 13 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 58 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 7 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 16 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 33 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 19 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 21 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 40 | 
12 files changed, 170 insertions, 51 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 1c49b8266d69..52a5e4fdc95b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -214,6 +214,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs  		case AMDGPU_CHUNK_ID_DEPENDENCIES:  		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:  		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: +		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:  			break;  		default: @@ -1090,6 +1091,15 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,  		fence = amdgpu_ctx_get_fence(ctx, entity,  					     deps[i].handle); + +		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { +			struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); +			struct dma_fence *old = fence; + +			fence = dma_fence_get(&s_fence->scheduled); +			dma_fence_put(old); +		} +  		if (IS_ERR(fence)) {  			r = PTR_ERR(fence);  			amdgpu_ctx_put(ctx); @@ -1177,7 +1187,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,  		chunk = &p->chunks[i]; -		if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) { +		if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES || +		    chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {  			r = amdgpu_cs_process_fence_dep(p, chunk);  			if (r)  				return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 384272603b21..4f8fb4ecde34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3618,6 +3618,38 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */  	return r;  } +static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev, +						  enum pci_bus_speed *speed, +						  enum pcie_link_width *width) +{ +	struct pci_dev *pdev = adev->pdev; +	enum pci_bus_speed cur_speed; +	enum pcie_link_width cur_width; + +	*speed = PCI_SPEED_UNKNOWN; +	*width = PCIE_LNK_WIDTH_UNKNOWN; + +	while (pdev) { +		cur_speed = pcie_get_speed_cap(pdev); +		cur_width = pcie_get_width_cap(pdev); + +		if (cur_speed != PCI_SPEED_UNKNOWN) { +			if (*speed == PCI_SPEED_UNKNOWN) +				*speed = cur_speed; +			else if (cur_speed < *speed) +				*speed = cur_speed; +		} + +		if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) { +			if (*width == PCIE_LNK_WIDTH_UNKNOWN) +				*width = cur_width; +			else if (cur_width < *width) +				*width = cur_width; +		} +		pdev = pci_upstream_bridge(pdev); +	} +} +  /**   * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot   * @@ -3630,8 +3662,8 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */  static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)  {  	struct pci_dev *pdev; -	enum pci_bus_speed speed_cap; -	enum pcie_link_width link_width; +	enum pci_bus_speed speed_cap, platform_speed_cap; +	enum pcie_link_width platform_link_width;  	if (amdgpu_pcie_gen_cap)  		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; @@ -3648,6 +3680,12 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)  		return;  	} +	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) +		return; + +	amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap, +					      &platform_link_width); +  	if (adev->pm.pcie_gen_mask == 0) {  		/* asic caps */  		pdev = adev->pdev; @@ -3673,22 +3711,20 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)  				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;  		}  		/* platform caps */ -		pdev = adev->ddev->pdev->bus->self; -		speed_cap = pcie_get_speed_cap(pdev); -		if (speed_cap == PCI_SPEED_UNKNOWN) { +		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {  			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |  						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);  		} else { -			if (speed_cap == PCIE_SPEED_16_0GT) +			if (platform_speed_cap == PCIE_SPEED_16_0GT)  				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); -			else if (speed_cap == PCIE_SPEED_8_0GT) +			else if (platform_speed_cap == PCIE_SPEED_8_0GT)  				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); -			else if (speed_cap == PCIE_SPEED_5_0GT) +			else if (platform_speed_cap == PCIE_SPEED_5_0GT)  				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);  			else @@ -3697,12 +3733,10 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)  		}  	}  	if (adev->pm.pcie_mlw_mask == 0) { -		pdev = adev->ddev->pdev->bus->self; -		link_width = pcie_get_width_cap(pdev); -		if (link_width == PCIE_LNK_WIDTH_UNKNOWN) { +		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {  			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;  		} else { -			switch (link_width) { +			switch (platform_link_width) {  			case PCIE_LNK_X32:  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index a1bb3773087b..7f3aa7b7e1d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -71,9 +71,11 @@   * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).   * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.   * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. + * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES + * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID   */  #define KMS_DRIVER_MAJOR	3 -#define KMS_DRIVER_MINOR	27 +#define KMS_DRIVER_MINOR	29  #define KMS_DRIVER_PATCHLEVEL	0  int amdgpu_vram_limit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h index ecbcefe49a98..f89f5734d985 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h @@ -37,6 +37,8 @@ struct amdgpu_gds {  	struct amdgpu_gds_asic_info	mem;  	struct amdgpu_gds_asic_info	gws;  	struct amdgpu_gds_asic_info	oa; +	uint32_t			gds_compute_max_wave_id; +  	/* At present, GDS, GWS and OA resources for gfx (graphics)  	 * is always pre-allocated and available for graphics operation.  	 * Such resource is shared between all gfx clients. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index f4f00217546e..d21dd2f369da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -54,10 +54,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,  	memset(&bp, 0, sizeof(bp));  	*obj = NULL; -	/* At least align on page size */ -	if (alignment < PAGE_SIZE) { -		alignment = PAGE_SIZE; -	}  	bp.size = size;  	bp.byte_align = alignment; @@ -244,9 +240,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,  			return -EINVAL;  		}  		flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; -		/* GDS allocations must be DW aligned */ -		if (args->in.domains & AMDGPU_GEM_DOMAIN_GDS) -			size = ALIGN(size, 4);  	}  	if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 728e15e5d68a..fd9c4beeaaa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -426,12 +426,20 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,  	size_t acc_size;  	int r; -	page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT; -	if (bp->domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | -			  AMDGPU_GEM_DOMAIN_OA)) +	/* Note that GDS/GWS/OA allocates 1 page per byte/resource. */ +	if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { +		/* GWS and OA don't need any alignment. */ +		page_align = bp->byte_align;  		size <<= PAGE_SHIFT; -	else +	} else if (bp->domain & AMDGPU_GEM_DOMAIN_GDS) { +		/* Both size and alignment must be a multiple of 4. */ +		page_align = ALIGN(bp->byte_align, 4); +		size = ALIGN(size, 4) << PAGE_SHIFT; +	} else { +		/* Memory should be aligned at least to a page size. */ +		page_align = ALIGN(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;  		size = ALIGN(size, PAGE_SIZE); +	}  	if (!amdgpu_bo_validate_size(adev, size, bp->domain))  		return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b852abb9db0f..73e71e61dc99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1756,7 +1756,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	}  	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, -				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, +				    4, AMDGPU_GEM_DOMAIN_GDS,  				    &adev->gds.gds_gfx_bo, NULL, NULL);  	if (r)  		return r; @@ -1769,7 +1769,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	}  	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, -				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, +				    1, AMDGPU_GEM_DOMAIN_GWS,  				    &adev->gds.gws_gfx_bo, NULL, NULL);  	if (r)  		return r; @@ -1782,7 +1782,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	}  	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, -				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, +				    1, AMDGPU_GEM_DOMAIN_OA,  				    &adev->gds.oa_gfx_bo, NULL, NULL);  	if (r)  		return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0bc6f553dc08..75481cf3348f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -107,14 +107,6 @@ struct amdgpu_pte_update_params {  	 * DMA addresses to use for mapping, used during VM update by CPU  	 */  	dma_addr_t *pages_addr; - -	/** -	 * @kptr: -	 * -	 * Kernel pointer of PD/PT BO that needs to be updated, -	 * used during VM update by CPU -	 */ -	void *kptr;  };  /** @@ -1789,13 +1781,20 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,  		if (pages_addr)  			params.src = ~0; -		/* Wait for PT BOs to be free. PTs share the same resv. object +		/* Wait for PT BOs to be idle. PTs share the same resv. object  		 * as the root PD BO  		 */  		r = amdgpu_vm_wait_pd(adev, vm, owner);  		if (unlikely(r))  			return r; +		/* Wait for any BO move to be completed */ +		if (exclusive) { +			r = dma_fence_wait(exclusive, true); +			if (unlikely(r)) +				return r; +		} +  		params.func = amdgpu_vm_cpu_set_ptes;  		params.pages_addr = pages_addr;  		return amdgpu_vm_update_ptes(¶ms, start, last + 1, @@ -1809,13 +1808,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,  	/*  	 * reserve space for two commands every (1 << BLOCK_SIZE)  	 *  entries or 2k dwords (whatever is smaller) -         * -         * The second command is for the shadow pagetables.  	 */ +	ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1); + +	/* The second command is for the shadow pagetables. */  	if (vm->root.base.bo->shadow) -		ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2; -	else -		ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1); +		ncmds *= 2;  	/* padding, etc. */  	ndw = 64; @@ -1834,10 +1832,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,  		ndw += ncmds * 10;  		/* extra commands for begin/end fragments */ +		ncmds = 2 * adev->vm_manager.fragment_size;  		if (vm->root.base.bo->shadow) -		        ndw += 2 * 10 * adev->vm_manager.fragment_size * 2; -		else -		        ndw += 2 * 10 * adev->vm_manager.fragment_size; +			ncmds *= 2; + +		ndw += 10 * ncmds;  		params.func = amdgpu_vm_do_set_ptes;  	} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 0d90672d0e58..407dd16cc35c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -125,7 +125,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)  	if (!hive) {  		ret = -EINVAL;  		dev_err(adev->dev, -			"XGMI: node 0x%llx, can not matech hive 0x%llx in the hive list.\n", +			"XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n",  			adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id);  		goto exit;  	} diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 7984292f9282..a59e0fdf5a97 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2264,6 +2264,22 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,  	unsigned vmid = AMDGPU_JOB_GET_VMID(job);  	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); +	/* Currently, there is a high possibility to get wave ID mismatch +	 * between ME and GDS, leading to a hw deadlock, because ME generates +	 * different wave IDs than the GDS expects. This situation happens +	 * randomly when at least 5 compute pipes use GDS ordered append. +	 * The wave IDs generated by ME are also wrong after suspend/resume. +	 * Those are probably bugs somewhere else in the kernel driver. +	 * +	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and +	 * GDS to 0 for this ring (me/pipe). +	 */ +	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { +		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); +		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START); +		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); +	} +  	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));  	amdgpu_ring_write(ring,  #ifdef __BIG_ENDIAN @@ -5000,7 +5016,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {  		7 + /* gfx_v7_0_ring_emit_pipeline_sync */  		CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */  		7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ -	.emit_ib_size =	4, /* gfx_v7_0_ring_emit_ib_compute */ +	.emit_ib_size =	7, /* gfx_v7_0_ring_emit_ib_compute */  	.emit_ib = gfx_v7_0_ring_emit_ib_compute,  	.emit_fence = gfx_v7_0_ring_emit_fence_compute,  	.emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync, @@ -5057,6 +5073,7 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)  	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);  	adev->gds.gws.total_size = 64;  	adev->gds.oa.total_size = 16; +	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);  	if (adev->gds.mem.total_size == 64 * 1024) {  		adev->gds.mem.gfx_partition_size = 4096; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index a26747681ed6..b8e50a34bdb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6084,6 +6084,22 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,  	unsigned vmid = AMDGPU_JOB_GET_VMID(job);  	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); +	/* Currently, there is a high possibility to get wave ID mismatch +	 * between ME and GDS, leading to a hw deadlock, because ME generates +	 * different wave IDs than the GDS expects. This situation happens +	 * randomly when at least 5 compute pipes use GDS ordered append. +	 * The wave IDs generated by ME are also wrong after suspend/resume. +	 * Those are probably bugs somewhere else in the kernel driver. +	 * +	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and +	 * GDS to 0 for this ring (me/pipe). +	 */ +	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { +		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); +		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START); +		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); +	} +  	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));  	amdgpu_ring_write(ring,  #ifdef __BIG_ENDIAN @@ -6890,7 +6906,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {  		7 + /* gfx_v8_0_ring_emit_pipeline_sync */  		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */  		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ -	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */ +	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */  	.emit_ib = gfx_v8_0_ring_emit_ib_compute,  	.emit_fence = gfx_v8_0_ring_emit_fence_compute,  	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, @@ -6920,7 +6936,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {  		7 + /* gfx_v8_0_ring_emit_pipeline_sync */  		17 + /* gfx_v8_0_ring_emit_vm_flush */  		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ -	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */ +	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */  	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,  	.test_ring = gfx_v8_0_ring_test_ring,  	.insert_nop = amdgpu_ring_insert_nop, @@ -6996,6 +7012,7 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)  	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);  	adev->gds.gws.total_size = 64;  	adev->gds.oa.total_size = 16; +	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);  	if (adev->gds.mem.total_size == 64 * 1024) {  		adev->gds.mem.gfx_partition_size = 4096; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 262ee3cf6f1c..5533f6e4f4a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4010,6 +4010,22 @@ static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,  	unsigned vmid = AMDGPU_JOB_GET_VMID(job);  	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); +	/* Currently, there is a high possibility to get wave ID mismatch +	 * between ME and GDS, leading to a hw deadlock, because ME generates +	 * different wave IDs than the GDS expects. This situation happens +	 * randomly when at least 5 compute pipes use GDS ordered append. +	 * The wave IDs generated by ME are also wrong after suspend/resume. +	 * Those are probably bugs somewhere else in the kernel driver. +	 * +	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and +	 * GDS to 0 for this ring (me/pipe). +	 */ +	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { +		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); +		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); +		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); +	} +  	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));  	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */  	amdgpu_ring_write(ring, @@ -4729,7 +4745,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {  		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +  		2 + /* gfx_v9_0_ring_emit_vm_flush */  		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ -	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */ +	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */  	.emit_ib = gfx_v9_0_ring_emit_ib_compute,  	.emit_fence = gfx_v9_0_ring_emit_fence,  	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, @@ -4764,7 +4780,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {  		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +  		2 + /* gfx_v9_0_ring_emit_vm_flush */  		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ -	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */ +	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */  	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,  	.test_ring = gfx_v9_0_ring_test_ring,  	.insert_nop = amdgpu_ring_insert_nop, @@ -4846,6 +4862,26 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)  		break;  	} +	switch (adev->asic_type) { +	case CHIP_VEGA10: +	case CHIP_VEGA20: +		adev->gds.gds_compute_max_wave_id = 0x7ff; +		break; +	case CHIP_VEGA12: +		adev->gds.gds_compute_max_wave_id = 0x27f; +		break; +	case CHIP_RAVEN: +		if (adev->rev_id >= 0x8) +			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ +		else +			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ +		break; +	default: +		/* this really depends on the chip */ +		adev->gds.gds_compute_max_wave_id = 0x7ff; +		break; +	} +  	adev->gds.gws.total_size = 64;  	adev->gds.oa.total_size = 16; | 

