diff options
-rw-r--r-- | llvm/lib/Target/R600/SIInstructions.td | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/and.ll | 23 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/bfi_int.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/ctpop.ll | 17 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/llvm.AMDGPU.rsq.ll | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/rsq.ll | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/trunc.ll | 15 |
7 files changed, 88 insertions, 16 deletions
diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 8082d3254ef..e4d7dc79be1 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -1976,9 +1976,9 @@ class BinOp64Pat <SDNode node, Instruction inst> : Pat < (EXTRACT_SUBREG i64:$src1, sub1)), sub1) >; -def : BinOp64Pat <and, V_AND_B32_e32>; -def : BinOp64Pat <or, V_OR_B32_e32>; -def : BinOp64Pat <xor, V_XOR_B32_e32>; +def : BinOp64Pat <and, V_AND_B32_e64>; +def : BinOp64Pat <or, V_OR_B32_e64>; +def : BinOp64Pat <xor, V_XOR_B32_e64>; class SextInReg <ValueType vt, int ShiftAmt> : Pat < (sext_inreg i32:$src0, vt), @@ -1990,7 +1990,7 @@ def : SextInReg <i16, 16>; def : Pat < (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)), - (V_BCNT_U32_B32_e32 $popcnt, $val) + (V_BCNT_U32_B32_e64 $popcnt, $val) >; def : Pat < @@ -2010,7 +2010,7 @@ def : Pat < def : Pat < (addc i32:$src0, i32:$src1), - (V_ADD_I32_e32 $src0, $src1) + (V_ADD_I32_e64 $src0, $src1) >; /********** ======================= **********/ @@ -3070,13 +3070,13 @@ def : Pat < def : Pat < (i1 (trunc i32:$a)), - (V_CMP_EQ_I32_e64 (V_AND_B32_e32 (i32 1), $a), 1) + (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1) >; //============================================================================// // Miscellaneous Optimization Patterns //============================================================================// -def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e32>; +def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>; } // End isSI predicate diff --git a/llvm/test/CodeGen/R600/and.ll b/llvm/test/CodeGen/R600/and.ll index 7bbbec2d969..ccb97e38f3b 100644 --- a/llvm/test/CodeGen/R600/and.ll +++ b/llvm/test/CodeGen/R600/and.ll @@ -129,11 +129,30 @@ endif: } ; FUNC-LABEL: @v_and_constant_i64 -; SI: V_AND_B32 -; SI: V_AND_B32 +; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { %a = load i64 addrspace(1)* %aptr, align 8 %and = and i64 %a, 1234567 store i64 %and, i64 addrspace(1)* %out, align 8 ret void } + +; FIXME: Replace and 0 with mov 0 +; FUNC-LABEL: @v_and_inline_imm_i64 +; SI: V_AND_B32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}} +; SI: V_AND_B32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}} +define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { + %a = load i64 addrspace(1)* %aptr, align 8 + %and = and i64 %a, 64 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @s_and_inline_imm_i64 +; SI: S_AND_B64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64 +define void @s_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 64 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} diff --git a/llvm/test/CodeGen/R600/bfi_int.ll b/llvm/test/CodeGen/R600/bfi_int.ll index d18702a1de9..107158de54d 100644 --- a/llvm/test/CodeGen/R600/bfi_int.ll +++ b/llvm/test/CodeGen/R600/bfi_int.ll @@ -38,8 +38,8 @@ entry: ; R600-CHECK: @bfi_sha256_ma ; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W ; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W -; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{[sv][0-9]+, v[0-9]+}} -; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{[sv][0-9]+, [sv][0-9]+}} +; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}} +; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}} define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { entry: diff --git a/llvm/test/CodeGen/R600/ctpop.ll b/llvm/test/CodeGen/R600/ctpop.ll index c7c406a57e6..fd128672a17 100644 --- a/llvm/test/CodeGen/R600/ctpop.ll +++ b/llvm/test/CodeGen/R600/ctpop.ll @@ -42,8 +42,7 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali ; SI: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], ; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0 ; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]] -; SI-NOT: ADD -; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] +; SI-NEXT: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] ; SI: BUFFER_STORE_DWORD [[RESULT]], ; SI: S_ENDPGM @@ -59,6 +58,20 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace ret void } +; FUNC-LABEL: @v_ctpop_add_sgpr_i32 +; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], +; SI-NEXT: S_WAITCNT +; SI-NEXT: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}} +; SI-NEXT: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind { + %val0 = load i32 addrspace(1)* %in0, align 4 + %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone + %add = add i32 %ctpop0, %sval + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} + ; FUNC-LABEL: @v_ctpop_v2i32: ; SI: V_BCNT_U32_B32_e32 ; SI: V_BCNT_U32_B32_e32 diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.rsq.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.rsq.ll index 27cf6b28fd6..449b3afc381 100644 --- a/llvm/test/CodeGen/R600/llvm.AMDGPU.rsq.ll +++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.rsq.ll @@ -4,10 +4,29 @@ declare float @llvm.AMDGPU.rsq.f32(float) nounwind readnone ; FUNC-LABEL: @rsq_f32 -; SI: V_RSQ_F32_e32 +; SI: V_RSQ_F32_e32 {{v[0-9]+}}, {{s[0-9]+}} ; EG: RECIPSQRT_IEEE define void @rsq_f32(float addrspace(1)* %out, float %src) nounwind { %rsq = call float @llvm.AMDGPU.rsq.f32(float %src) nounwind readnone store float %rsq, float addrspace(1)* %out, align 4 ret void } + +; TODO: Really these should be constant folded +; FUNC-LABEL: @rsq_f32_constant_4.0 +; SI: V_RSQ_F32_e32 {{v[0-9]+}}, 4.0 +; EG: RECIPSQRT_IEEE +define void @rsq_f32_constant_4.0(float addrspace(1)* %out) nounwind { + %rsq = call float @llvm.AMDGPU.rsq.f32(float 4.0) nounwind readnone + store float %rsq, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @rsq_f32_constant_100.0 +; SI: V_RSQ_F32_e32 {{v[0-9]+}}, 0x42c80000 +; EG: RECIPSQRT_IEEE +define void @rsq_f32_constant_100.0(float addrspace(1)* %out) nounwind { + %rsq = call float @llvm.AMDGPU.rsq.f32(float 100.0) nounwind readnone + store float %rsq, float addrspace(1)* %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/R600/rsq.ll b/llvm/test/CodeGen/R600/rsq.ll index 3069f62724b..a9f3013d3e4 100644 --- a/llvm/test/CodeGen/R600/rsq.ll +++ b/llvm/test/CodeGen/R600/rsq.ll @@ -26,3 +26,13 @@ define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noa store double %div, double addrspace(1)* %out, align 4 ret void } + +; SI-LABEL: @rsq_f32_sgpr +; SI: V_RSQ_F32_e32 {{v[0-9]+}}, {{s[0-9]+}} +; SI: S_ENDPGM +define void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind { + %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone + %div = fdiv float 1.0, %sqrt + store float %div, float addrspace(1)* %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/R600/trunc.ll b/llvm/test/CodeGen/R600/trunc.ll index 2f4b48236d4..d3b191db282 100644 --- a/llvm/test/CodeGen/R600/trunc.ll +++ b/llvm/test/CodeGen/R600/trunc.ll @@ -46,9 +46,20 @@ define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 } ; SI-LABEL: @trunc_i32_to_i1: -; SI: V_AND_B32 +; SI: V_AND_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} ; SI: V_CMP_EQ_I32 -define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) { +define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) { + %a = load i32 addrspace(1)* %ptr, align 4 + %trunc = trunc i32 %a to i1 + %result = select i1 %trunc, i32 1, i32 0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @sgpr_trunc_i32_to_i1: +; SI: V_AND_B32_e64 v{{[0-9]+}}, 1, s{{[0-9]+}} +; SI: V_CMP_EQ_I32 +define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) { %trunc = trunc i32 %a to i1 %result = select i1 %trunc, i32 1, i32 0 store i32 %result, i32 addrspace(1)* %out, align 4 |