diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 196 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h | 7 | 
2 files changed, 109 insertions, 94 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 271452d3999a..8b47484e169a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -33,56 +33,34 @@  static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev)  { -	u32 reg; -	int timeout = AI_MAILBOX_TIMEDOUT; -	u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID); - -	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, -					     mmBIF_BX_PF0_MAILBOX_CONTROL)); -	reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_ACK, 1); -	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, -				       mmBIF_BX_PF0_MAILBOX_CONTROL), reg); - -	/*Wait for RCV_MSG_VALID to be 0*/ -	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, -					     mmBIF_BX_PF0_MAILBOX_CONTROL)); -	while (reg & mask) { -		if (timeout <= 0) { -			pr_err("RCV_MSG_VALID is not cleared\n"); -			break; -		} -		mdelay(1); -		timeout -=1; - -		reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, -						     mmBIF_BX_PF0_MAILBOX_CONTROL)); -	} +	WREG8(AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2);  }  static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val)  { -	u32 reg; +	WREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE, val ? 1 : 0); +} -	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, -					     mmBIF_BX_PF0_MAILBOX_CONTROL)); -	reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, -			    TRN_MSG_VALID, val ? 1 : 0); -	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL), -		      reg); +/* + * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine + * RCV_MSG_VALID filed of BIF_BX_PF0_MAILBOX_CONTROL must already be set to 1 + * by host. + * + * if called no in IRQ routine, this peek_msg cannot guaranteed to return the + * correct value since it doesn't return the RCV_DW0 under the case that + * RCV_MSG_VALID is set by host. + */ +static enum idh_event xgpu_ai_mailbox_peek_msg(struct amdgpu_device *adev) +{ +	return RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, +				mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0));  } +  static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev,  				   enum idh_event event)  {  	u32 reg; -	u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID); - -	if (event != IDH_FLR_NOTIFICATION_CMPL) { -		reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, -						     mmBIF_BX_PF0_MAILBOX_CONTROL)); -		if (!(reg & mask)) -			return -ENOENT; -	}  	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,  					     mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0)); @@ -94,54 +72,67 @@ static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev,  	return 0;  } +static uint8_t xgpu_ai_peek_ack(struct amdgpu_device *adev) { +	return RREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE) & 2; +} +  static int xgpu_ai_poll_ack(struct amdgpu_device *adev)  { -	int r = 0, timeout = AI_MAILBOX_TIMEDOUT; -	u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, TRN_MSG_ACK); -	u32 reg; +	int timeout  = AI_MAILBOX_POLL_ACK_TIMEDOUT; +	u8 reg; + +	do { +		reg = RREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE); +		if (reg & 2) +			return 0; -	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, -					     mmBIF_BX_PF0_MAILBOX_CONTROL)); -	while (!(reg & mask)) { -		if (timeout <= 0) { -			pr_err("Doesn't get ack from pf.\n"); -			r = -ETIME; -			break; -		}  		mdelay(5);  		timeout -= 5; +	} while (timeout > 1); -		reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, -						     mmBIF_BX_PF0_MAILBOX_CONTROL)); -	} +	pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT); -	return r; +	return -ETIME;  }  static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)  { -	int r = 0, timeout = AI_MAILBOX_TIMEDOUT; - -	r = xgpu_ai_mailbox_rcv_msg(adev, event); -	while (r) { -		if (timeout <= 0) { -			pr_err("Doesn't get msg:%d from pf.\n", event); -			r = -ETIME; -			break; -		} -		mdelay(5); -		timeout -= 5; +	int r, timeout = AI_MAILBOX_POLL_MSG_TIMEDOUT; +	do {  		r = xgpu_ai_mailbox_rcv_msg(adev, event); -	} +		if (!r) +			return 0; -	return r; +		msleep(10); +		timeout -= 10; +	} while (timeout > 1); + +	pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r); + +	return -ETIME;  }  static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,  	      enum idh_request req, u32 data1, u32 data2, u32 data3) {  	u32 reg;  	int r; +	uint8_t trn; + +	/* IMPORTANT: +	 * clear TRN_MSG_VALID valid to clear host's RCV_MSG_ACK +	 * and with host's RCV_MSG_ACK cleared hw automatically clear host's RCV_MSG_ACK +	 * which lead to VF's TRN_MSG_ACK cleared, otherwise below xgpu_ai_poll_ack() +	 * will return immediatly +	 */ +	do { +		xgpu_ai_mailbox_set_valid(adev, false); +		trn = xgpu_ai_peek_ack(adev); +		if (trn) { +			pr_err("trn=%x ACK should not asssert! wait again !\n", trn); +			msleep(1); +		} +	} while(trn);  	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,  					     mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0)); @@ -245,15 +236,36 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)  {  	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);  	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); - -	/* wait until RCV_MSG become 3 */ -	if (xgpu_ai_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) { -		pr_err("failed to recieve FLR_CMPL\n"); -		return; -	} - -	/* Trigger recovery due to world switch failure */ -	amdgpu_device_gpu_recover(adev, NULL, false); +	int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT; +	int locked; + +	/* block amdgpu_gpu_recover till msg FLR COMPLETE received, +	 * otherwise the mailbox msg will be ruined/reseted by +	 * the VF FLR. +	 * +	 * we can unlock the lock_reset to allow "amdgpu_job_timedout" +	 * to run gpu_recover() after FLR_NOTIFICATION_CMPL received +	 * which means host side had finished this VF's FLR. +	 */ +	locked = mutex_trylock(&adev->lock_reset); +	if (locked) +		adev->in_gpu_reset = 1; + +	do { +		if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) +			goto flr_done; + +		msleep(10); +		timeout -= 10; +	} while (timeout > 1); + +flr_done: +	if (locked) +		mutex_unlock(&adev->lock_reset); + +	/* Trigger recovery for world switch failure if no TDR */ +	if (amdgpu_lockup_timeout == 0) +		amdgpu_device_gpu_recover(adev, NULL, true);  }  static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -274,24 +286,22 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,  				   struct amdgpu_irq_src *source,  				   struct amdgpu_iv_entry *entry)  { -	int r; - -	/* trigger gpu-reset by hypervisor only if TDR disbaled */ -	if (!amdgpu_gpu_recovery) { -		/* see what event we get */ -		r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); - -		/* sometimes the interrupt is delayed to inject to VM, so under such case -		 * the IDH_FLR_NOTIFICATION is overwritten by VF FLR from GIM side, thus -		 * above recieve message could be failed, we should schedule the flr_work -		 * anyway +	enum idh_event event = xgpu_ai_mailbox_peek_msg(adev); + +	switch (event) { +		case IDH_FLR_NOTIFICATION: +		if (amdgpu_sriov_runtime(adev)) +			schedule_work(&adev->virt.flr_work); +		break; +		/* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore +		 * it byfar since that polling thread will handle it, +		 * other msg like flr complete is not handled here.  		 */ -		if (r) { -			DRM_ERROR("FLR_NOTIFICATION is missed\n"); -			xgpu_ai_mailbox_send_ack(adev); -		} - -		schedule_work(&adev->virt.flr_work); +		case IDH_CLR_MSG_BUF: +		case IDH_FLR_NOTIFICATION_CMPL: +		case IDH_READY_TO_ACCESS_GPU: +		default: +		break;  	}  	return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index 67e78576a9eb..b4a9ceea334b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -24,7 +24,9 @@  #ifndef __MXGPU_AI_H__  #define __MXGPU_AI_H__ -#define AI_MAILBOX_TIMEDOUT	12000 +#define AI_MAILBOX_POLL_ACK_TIMEDOUT	500 +#define AI_MAILBOX_POLL_MSG_TIMEDOUT	12000 +#define AI_MAILBOX_POLL_FLR_TIMEDOUT	500  enum idh_request {  	IDH_REQ_GPU_INIT_ACCESS = 1, @@ -51,4 +53,7 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev);  int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev);  void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev); +#define AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 +#define AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1 +  #endif | 

