summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2016-01-20 00:13:22 +0000
committerTom Stellard <thomas.stellard@amd.com>2016-01-20 00:13:22 +0000
commit2e045bbc5f1001a908bfb9267b792bdc6dd72c5d (patch)
tree008350af964d2965d3d1f18fa9bb3a7bf95927b8
parent69005960400a84f3c8b03f173afdad79ccd1d4a0 (diff)
downloadbcm5719-llvm-2e045bbc5f1001a908bfb9267b792bdc6dd72c5d.tar.gz
bcm5719-llvm-2e045bbc5f1001a908bfb9267b792bdc6dd72c5d.zip
AMDGPU/SI: Prevent the DAGCombiner from creating setcc with i1 inputs
Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D15035 llvm-svn: 258256
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h2
-rw-r--r--llvm/test/CodeGen/AMDGPU/setcc-opt.ll54
-rw-r--r--llvm/test/CodeGen/AMDGPU/setcc.ll14
-rw-r--r--llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll3
5 files changed, 81 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 2c22cee4bf4..7ac55b87b84 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -533,6 +533,16 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return TII->isInlineConstant(Imm);
}
+bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const {
+
+ // SimplifySetCC uses this function to determine whether or not it should
+ // create setcc with i1 operands. We don't have instructions for i1 setcc.
+ if (VT == MVT::i1 && Op == ISD::SETCC)
+ return false;
+
+ return TargetLowering::isTypeDesirableForOp(Op, VT);
+}
+
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
SDLoc SL, SDValue Chain,
unsigned Offset, bool Signed) const {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index f01b2c0d09f..47aa16ada32 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -89,6 +89,8 @@ public:
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ bool isTypeDesirableForOp(unsigned Op, EVT VT) const override;
+
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/llvm/test/CodeGen/AMDGPU/setcc-opt.ll b/llvm/test/CodeGen/AMDGPU/setcc-opt.ll
index 63d74820f96..ae69f4170c6 100644
--- a/llvm/test/CodeGen/AMDGPU/setcc-opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/setcc-opt.ll
@@ -68,6 +68,34 @@ define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
ret void
}
+; FUNC-LABEL: {{^}}sext_bool_icmp_eq_neg1:
+; GCN-NOT: v_cmp
+; GCN: v_cmp_eq_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN-NEXT: s_endpgm
+define void @sext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %icmp0 = icmp eq i32 %a, %b
+ %ext = sext i1 %icmp0 to i32
+ %icmp1 = icmp eq i32 %ext, -1
+ store i1 %icmp1, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sext_bool_icmp_ne_neg1:
+; GCN-NOT: v_cmp
+; GCN: v_cmp_eq_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN-NEXT: s_endpgm
+define void @sext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %icmp0 = icmp ne i32 %a, %b
+ %ext = sext i1 %icmp0 to i32
+ %icmp1 = icmp ne i32 %ext, -1
+ store i1 %icmp1, i1 addrspace(1)* %out
+ ret void
+}
+
; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0:
; GCN-NOT: v_cmp
; GCN: v_cmp_ne_i32_e32 vcc,
@@ -123,6 +151,32 @@ define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
ret void
}
+; Reduces to false:
+; FUNC-LABEL: {{^}}zext_bool_icmp_eq_neg1:
+; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_byte [[TMP]]
+; GCN-NEXT: s_endpgm
+define void @zext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %icmp0 = icmp eq i32 %a, %b
+ %ext = zext i1 %icmp0 to i32
+ %icmp1 = icmp eq i32 %ext, -1
+ store i1 %icmp1, i1 addrspace(1)* %out
+ ret void
+}
+
+; Reduces to true:
+; FUNC-LABEL: {{^}}zext_bool_icmp_ne_neg1:
+; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 1{{$}}
+; GCN: buffer_store_byte [[TMP]]
+; GCN-NEXT: s_endpgm
+define void @zext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %icmp0 = icmp ne i32 %a, %b
+ %ext = zext i1 %icmp0 to i32
+ %icmp1 = icmp ne i32 %ext, -1
+ store i1 %icmp1, i1 addrspace(1)* %out
+ ret void
+}
+
; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k:
; SI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
diff --git a/llvm/test/CodeGen/AMDGPU/setcc.ll b/llvm/test/CodeGen/AMDGPU/setcc.ll
index f33a82df5ff..430989faf9b 100644
--- a/llvm/test/CodeGen/AMDGPU/setcc.ll
+++ b/llvm/test/CodeGen/AMDGPU/setcc.ll
@@ -375,3 +375,17 @@ define void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra,
store <3 x i8> %ext, <3 x i8> addrspace(1)* %gep.out
ret void
}
+
+; Make sure we don't try to emit i1 setcc ops
+; FUNC-LABEL: setcc-i1
+; SI: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 1
+; SI: v_cmp_eq_i32_e64 s[0:1], 0, [[AND]]
+define void @setcc-i1(i32 %in) {
+ %and = and i32 %in, 1
+ %cmp = icmp eq i32 %and, 0
+ br i1 %cmp, label %endif, label %if
+if:
+ unreachable
+endif:
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll b/llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll
index dac74728b3c..4663eb0b6b3 100644
--- a/llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll
@@ -4,8 +4,7 @@
; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_eq_0:
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
-; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
-; SI: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc, -1{{$}}
+; SI: v_cmp_eq_i32_e32 vcc, 0, [[TMP]]{{$}}
; SI: v_cndmask_b32_e64
; SI: buffer_store_byte
define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
OpenPOWER on IntegriCloud