summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2020-01-03 14:57:35 -0500
committerSanjay Patel <spatel@rotateright.com>2020-01-03 14:58:49 -0500
commitca7fdd41bda02a24c401ecf75d306cea93c27fb5 (patch)
tree68a7a5a9fddc2e9739d54256d34a618ccc0ef099
parent7cdc60c3db1ed96f3d976ad913709c6c83776f3b (diff)
downloadbcm5719-llvm-ca7fdd41bda02a24c401ecf75d306cea93c27fb5.tar.gz
bcm5719-llvm-ca7fdd41bda02a24c401ecf75d306cea93c27fb5.zip
[DAGCombiner] fix miscompile in translating (X & undef) to shuffle
See PR42982 for more context: https://bugs.llvm.org/show_bug.cgi?id=42982
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp4
-rw-r--r--llvm/test/CodeGen/X86/combine-and.ll4
2 files changed, 5 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3bfea38932f..d401e7fb657 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19769,8 +19769,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
int EltIdx = i / Split;
int SubIdx = i % Split;
SDValue Elt = RHS.getOperand(EltIdx);
+ // X & undef --> 0 (not undef). So this lane must be converted to choose
+ // from the zero constant vector (same as if the element had all 0-bits).
if (Elt.isUndef()) {
- Indices.push_back(-1);
+ Indices.push_back(i + NumSubElts);
continue;
}
diff --git a/llvm/test/CodeGen/X86/combine-and.ll b/llvm/test/CodeGen/X86/combine-and.ll
index c3a39487375..462374c4862 100644
--- a/llvm/test/CodeGen/X86/combine-and.ll
+++ b/llvm/test/CodeGen/X86/combine-and.ll
@@ -163,13 +163,13 @@ define <4 x i32> @test14(<4 x i32> %A) {
ret <4 x i32> %1
}
-; FIXME: X & undef must fold to 0. So lane 0 must choose from the zero vector.
+; X & undef must fold to 0. So lane 0 must choose from the zero vector.
define <4 x i32> @undef_lane(<4 x i32> %x) {
; CHECK-LABEL: undef_lane:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
; CHECK-NEXT: retq
%r = and <4 x i32> %x, <i32 undef, i32 4294967295, i32 0, i32 4294967295>
ret <4 x i32> %r
OpenPOWER on IntegriCloud