summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2018-11-18 17:36:23 +0000
committerSanjay Patel <spatel@rotateright.com>2018-11-18 17:36:23 +0000
commit8c0cd77bffb55151dcfcf58caf0f6a60b7556bcc (patch)
tree279187f1dfbf9e7914f9b094465c64cbab0d6b5b
parentec808cf541e351d2f7129e5faa154fe84afc8087 (diff)
downloadbcm5719-llvm-8c0cd77bffb55151dcfcf58caf0f6a60b7556bcc.tar.gz
bcm5719-llvm-8c0cd77bffb55151dcfcf58caf0f6a60b7556bcc.zip
[DAG] add undef simplifications for select nodes
Sadly, this duplicates (twice) the logic from InstSimplify. There might be some way to at least share the DAG versions of the code, but copying the folds seems to be the standard method to ensure that we don't miss these folds. Unlike in IR, we don't run DAGCombiner to fixpoint, so there's no way to ensure that we do these kinds of simplifications unless the code is repeated at node creation time and during combines. There were other tests that would become worthless with this improvement that I changed as pre-commits: rL347161 rL347164 rL347165 rL347166 rL347167 I'm not sure how to salvage the remaining tests (diffs in this patch). So the x86 tests verify that the new code is working as intended. The AMDGPU test is actually similar to my motivating case: we have some undef value that has survived to machine IR in an x86 test, and then it gets folded in some weird way, or we crash if we don't transfer the undef flag. But we would have been better off never getting to that point by doing these simplifications. This will lead back to PR32023 someday... https://bugs.llvm.org/show_bug.cgi?id=32023 llvm-svn: 347170
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp27
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp16
-rw-r--r--llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll3
-rw-r--r--llvm/test/CodeGen/X86/avx512-select.ll4
-rw-r--r--llvm/test/CodeGen/X86/zext-extract_subreg.ll4
5 files changed, 34 insertions, 20 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 24f019d55c9..0f428b72d52 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7236,21 +7236,24 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
EVT VT0 = N0.getValueType();
SDLoc DL(N);
- // fold (select C, X, X) -> X
- if (N1 == N2)
- return N1;
-
- // fold (select, C, X, undef) -> X
- if (N2.isUndef())
- return N1;
+ // select undef, N1, N2 --> N1 (if it's a constant), otherwise N2
+ if (N0.isUndef())
+ return isa<ConstantSDNode>(N1) ? N1 : N2;
+ // select, ?, undef, N2 --> N2
if (N1.isUndef())
return N2;
+ // select, ?, N1, undef --> N1
+ if (N2.isUndef())
+ return N1;
- if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
- // fold (select true, X, Y) -> X
- // fold (select false, X, Y) -> Y
- return !N0C->isNullValue() ? N1 : N2;
- }
+ // fold (select true, X, Y) -> X
+ // fold (select false, X, Y) -> Y
+ if (auto *N0C = dyn_cast<const ConstantSDNode>(N0))
+ return N0C->isNullValue() ? N2 : N1;
+
+ // select ?, N1, N1 --> N1
+ if (N1 == N2)
+ return N1;
// fold (select X, X, Y) -> (or X, Y)
// fold (select X, 1, Y) -> (or C, Y)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 70958c557f9..45c435a317d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5078,12 +5078,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
case ISD::SELECT:
+ // select undef, N2, N3 --> N2 (if it's a constant), otherwise N3
+ if (N1.isUndef())
+ return isa<ConstantSDNode>(N2) ? N2 : N3;
+ // select, ?, undef, N3 --> N3
+ if (N2.isUndef())
+ return N3;
+ // select, ?, N2, undef --> N2
+ if (N3.isUndef())
+ return N2;
+
// select true, N2, N3 --> N2
// select false, N2, N3 --> N3
if (auto *N1C = dyn_cast<ConstantSDNode>(N1))
- return N1C->getZExtValue() ? N2 : N3;
+ return N1C->isNullValue() ? N3 : N2;
- if (N2 == N3) return N2; // select ?, N2, N2 --> N2
+ // select ?, N2, N2 --> N2
+ if (N2 == N3)
+ return N2;
break;
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
diff --git a/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll b/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
index 789decb45f2..faa468c974c 100644
--- a/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
+++ b/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
@@ -32,7 +32,6 @@ bb2:
; GCN-LABEL: {{^}}preserve_condition_undef_flag:
; GCN-NOT: vcc
-; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
define amdgpu_kernel void @preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) {
bb0:
%tmp = icmp sgt i32 %arg1, 4
@@ -40,7 +39,7 @@ bb0:
%tmp4 = select i1 %undef, float %arg, float 1.000000e+00
%tmp5 = fcmp ogt float %arg2, 0.000000e+00
%tmp6 = fcmp olt float %arg2, 1.000000e+00
- %tmp7 = fcmp olt float %arg, %tmp4
+ %tmp7 = fcmp olt float %arg, undef
%tmp8 = and i1 %tmp5, %tmp6
%tmp9 = and i1 %tmp8, %tmp7
br i1 %tmp9, label %bb1, label %bb2
diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll
index e714a728230..4a1aca4f800 100644
--- a/llvm/test/CodeGen/X86/avx512-select.ll
+++ b/llvm/test/CodeGen/X86/avx512-select.ll
@@ -265,13 +265,13 @@ define i8 @select07(i8 %a.0, i8 %b.0, i8 %m) {
define i64 @pr30249() {
; X86-LABEL: pr30249:
; X86: # %bb.0:
-; X86-NEXT: movl $2, %eax
+; X86-NEXT: movl $1, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: retl
;
; X64-LABEL: pr30249:
; X64: # %bb.0:
-; X64-NEXT: movl $2, %eax
+; X64-NEXT: movl $1, %eax
; X64-NEXT: retq
%v = select i1 undef , i64 1, i64 2
ret i64 %v
diff --git a/llvm/test/CodeGen/X86/zext-extract_subreg.ll b/llvm/test/CodeGen/X86/zext-extract_subreg.ll
index 86fce0c235b..0b924d6ea95 100644
--- a/llvm/test/CodeGen/X86/zext-extract_subreg.ll
+++ b/llvm/test/CodeGen/X86/zext-extract_subreg.ll
@@ -14,8 +14,8 @@ define void @t() nounwind ssp {
; CHECK-NEXT: LBB0_6: ## %return
; CHECK-NEXT: retq
; CHECK-NEXT: LBB0_2: ## %if.end
-; CHECK-NEXT: movl (%rax), %eax
-; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne LBB0_5
OpenPOWER on IntegriCloud