summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCarl Ritson <carl.ritson@amd.com>2019-03-08 09:03:11 +0000
committerCarl Ritson <carl.ritson@amd.com>2019-03-08 09:03:11 +0000
commit1a98dc184044768dd672c54b3b9557a05dd082f1 (patch)
tree9913f5ffc732c591a2066e520e3a1cde09ef7308
parent07ddb9d91ff6f05cdbf4639961c6e248c74a61d4 (diff)
downloadbcm5719-llvm-1a98dc184044768dd672c54b3b9557a05dd082f1.tar.gz
bcm5719-llvm-1a98dc184044768dd672c54b3b9557a05dd082f1.zip
[AMDGPU] V_CVT_F32_UBYTE{0,1,2,3} are full rate instructions
Summary: Fix a bug in the scheduling model where V_CVT_F32_UBYTE{0,1,2,3} are incorrectly marked as quarter rate instructions. Reviewers: arsenm, rampitec Reviewed By: rampitec Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59091 llvm-svn: 355671
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td7
-rw-r--r--llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/udivrem24.ll8
3 files changed, 10 insertions, 9 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index aafe0339acf..85077beff69 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -185,13 +185,14 @@ defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>;
defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
+defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
+defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
+} // End SchedRW = [WriteQuarterRate32]
+
defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>;
defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>;
defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>;
defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>;
-defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
-defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
-} // End SchedRW = [WriteQuarterRate32]
defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>;
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
index 4c41565dca9..6ffc9e2e513 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
@@ -116,8 +116,8 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)
; VI-DAG: v_add_u16_e32
; VI-DAG: v_add_u16_e32
-; GCN: {{buffer|flat}}_store_dwordx4
-; GCN: {{buffer|flat}}_store_dword
+; GCN-DAG: {{buffer|flat}}_store_dwordx4
+; GCN-DAG: {{buffer|flat}}_store_dword
; GCN: s_endpgm
define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/udivrem24.ll b/llvm/test/CodeGen/AMDGPU/udivrem24.ll
index 2c38f71f4b1..6e37578ef25 100644
--- a/llvm/test/CodeGen/AMDGPU/udivrem24.ll
+++ b/llvm/test/CodeGen/AMDGPU/udivrem24.ll
@@ -4,8 +4,8 @@
; FUNC-LABEL: {{^}}udiv24_i8:
; SI: v_cvt_f32_ubyte
-; SI: v_cvt_f32_ubyte
-; SI: v_rcp_iflag_f32
+; SI-DAG: v_cvt_f32_ubyte
+; SI-DAG: v_rcp_iflag_f32
; SI: v_cvt_u32_f32
; EG: UINT_TO_FLT
@@ -176,8 +176,8 @@ define amdgpu_kernel void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addr
; FUNC-LABEL: {{^}}urem24_i8:
; SI: v_cvt_f32_ubyte
-; SI: v_cvt_f32_ubyte
-; SI: v_rcp_iflag_f32
+; SI-DAG: v_cvt_f32_ubyte
+; SI-DAG: v_rcp_iflag_f32
; SI: v_cvt_u32_f32
; EG: UINT_TO_FLT
OpenPOWER on IntegriCloud