diff options
| author | Tom Stellard <thomas.stellard@amd.com> | 2016-01-20 00:13:22 +0000 |
|---|---|---|
| committer | Tom Stellard <thomas.stellard@amd.com> | 2016-01-20 00:13:22 +0000 |
| commit | 2e045bbc5f1001a908bfb9267b792bdc6dd72c5d (patch) | |
| tree | 008350af964d2965d3d1f18fa9bb3a7bf95927b8 | |
| parent | 69005960400a84f3c8b03f173afdad79ccd1d4a0 (diff) | |
| download | bcm5719-llvm-2e045bbc5f1001a908bfb9267b792bdc6dd72c5d.tar.gz bcm5719-llvm-2e045bbc5f1001a908bfb9267b792bdc6dd72c5d.zip | |
AMDGPU/SI: Prevent the DAGCombiner from creating setcc with i1 inputs
Reviewers: arsenm
Subscribers: arsenm, llvm-commits
Differential Revision: http://reviews.llvm.org/D15035
llvm-svn: 258256
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/setcc-opt.ll | 54 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/setcc.ll | 14 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll | 3 |
5 files changed, 81 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 2c22cee4bf4..7ac55b87b84 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -533,6 +533,16 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return TII->isInlineConstant(Imm); } +bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const { + + // SimplifySetCC uses this function to determine whether or not it should + // create setcc with i1 operands. We don't have instructions for i1 setcc. + if (VT == MVT::i1 && Op == ISD::SETCC) + return false; + + return TargetLowering::isTypeDesirableForOp(Op, VT); +} + SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc SL, SDValue Chain, unsigned Offset, bool Signed) const { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index f01b2c0d09f..47aa16ada32 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -89,6 +89,8 @@ public: bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; + bool isTypeDesirableForOp(unsigned Op, EVT VT) const override; + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/llvm/test/CodeGen/AMDGPU/setcc-opt.ll b/llvm/test/CodeGen/AMDGPU/setcc-opt.ll index 63d74820f96..ae69f4170c6 100644 --- a/llvm/test/CodeGen/AMDGPU/setcc-opt.ll +++ b/llvm/test/CodeGen/AMDGPU/setcc-opt.ll @@ -68,6 +68,34 @@ define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind ret void } +; FUNC-LABEL: {{^}}sext_bool_icmp_eq_neg1: +; GCN-NOT: v_cmp +; GCN: v_cmp_eq_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN-NEXT: s_endpgm +define void @sext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %icmp0 = icmp eq i32 %a, %b + %ext = sext i1 %icmp0 to i32 + %icmp1 = icmp eq i32 %ext, -1 + store i1 %icmp1, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sext_bool_icmp_ne_neg1: +; GCN-NOT: v_cmp +; GCN: v_cmp_eq_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN-NEXT: s_endpgm +define void @sext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %icmp0 = icmp ne i32 %a, %b + %ext = sext i1 %icmp0 to i32 + %icmp1 = icmp ne i32 %ext, -1 + store i1 %icmp1, i1 addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0: ; GCN-NOT: v_cmp ; GCN: v_cmp_ne_i32_e32 vcc, @@ -123,6 +151,32 @@ define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind ret void } +; Reduces to false: +; FUNC-LABEL: {{^}}zext_bool_icmp_eq_neg1: +; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 0{{$}} +; GCN: buffer_store_byte [[TMP]] +; GCN-NEXT: s_endpgm +define void @zext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %icmp0 = icmp eq i32 %a, %b + %ext = zext i1 %icmp0 to i32 + %icmp1 = icmp eq i32 %ext, -1 + store i1 %icmp1, i1 addrspace(1)* %out + ret void +} + +; Reduces to true: +; FUNC-LABEL: {{^}}zext_bool_icmp_ne_neg1: +; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 1{{$}} +; GCN: buffer_store_byte [[TMP]] +; GCN-NEXT: s_endpgm +define void @zext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %icmp0 = icmp ne i32 %a, %b + %ext = zext i1 %icmp0 to i32 + %icmp1 = icmp ne i32 %ext, -1 + store i1 %icmp1, i1 addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k: ; SI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc diff --git a/llvm/test/CodeGen/AMDGPU/setcc.ll b/llvm/test/CodeGen/AMDGPU/setcc.ll index f33a82df5ff..430989faf9b 100644 --- a/llvm/test/CodeGen/AMDGPU/setcc.ll +++ b/llvm/test/CodeGen/AMDGPU/setcc.ll @@ -375,3 +375,17 @@ define void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, store <3 x i8> %ext, <3 x i8> addrspace(1)* %gep.out ret void } + +; Make sure we don't try to emit i1 setcc ops +; FUNC-LABEL: setcc-i1 +; SI: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 1 +; SI: v_cmp_eq_i32_e64 s[0:1], 0, [[AND]] +define void @setcc-i1(i32 %in) { + %and = and i32 %in, 1 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %endif, label %if +if: + unreachable +endif: + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll b/llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll index dac74728b3c..4663eb0b6b3 100644 --- a/llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll @@ -4,8 +4,7 @@ ; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_eq_0: ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] -; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}} -; SI: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc, -1{{$}} +; SI: v_cmp_eq_i32_e32 vcc, 0, [[TMP]]{{$}} ; SI: v_cndmask_b32_e64 ; SI: buffer_store_byte define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { |

