summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2018-06-01 19:23:18 +0000
committerSanjay Patel <spatel@rotateright.com>2018-06-01 19:23:18 +0000
commit66f7e19f6a05ca11f62c9e48f8d6ed0c011a47cf (patch)
tree772820a4df6d329d4fd051f98c7d1ba5f9f9fa39 /llvm
parent36d457c20d86a749e08ecee23eeb2bade5f2ab56 (diff)
downloadbcm5719-llvm-66f7e19f6a05ca11f62c9e48f8d6ed0c011a47cf.tar.gz
bcm5719-llvm-66f7e19f6a05ca11f62c9e48f8d6ed0c011a47cf.zip
[InstCombine] fix vector shuffle transform to replace undef elements (PR37648)
This bug: https://bugs.llvm.org/show_bug.cgi?id=37648 ...was created with the enhancement to this transform with rL332479. The urem test shows the disaster potential: any undef divisor lane makes the whole op undef. The test diffs show that vector demanded elements turns some of the potential, but not all, unused binop operands back into undef already. llvm-svn: 333782
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp16
-rw-r--r--llvm/test/Transforms/InstCombine/vec_shuffle.ll8
2 files changed, 20 insertions, 4 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 7ccb65d6531..7a34419bfe9 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1421,6 +1421,22 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
}
}
if (MayChange) {
+ // It's not safe to use a vector with undef elements because the entire
+ // instruction can be folded to undef (for example, div/rem divisors).
+ // Replace undef lanes with the first non-undef element. Vector demanded
+ // elements can change those back to undef values if that is safe.
+ Constant *SafeDummyConstant = nullptr;
+ for (unsigned i = 0; i < VWidth; ++i) {
+ if (!isa<UndefValue>(NewVecC[i])) {
+ SafeDummyConstant = NewVecC[i];
+ break;
+ }
+ }
+ assert(SafeDummyConstant && "Undef constant vector was not simplified?");
+ for (unsigned i = 0; i < VWidth; ++i)
+ if (isa<UndefValue>(NewVecC[i]))
+ NewVecC[i] = SafeDummyConstant;
+
// Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
// Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
Constant *NewC = ConstantVector::get(NewVecC);
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index 5447063bbf6..1b692496384 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -452,7 +452,7 @@ define <4 x i32> @mul_const_splat(<4 x i32> %v) {
define <4 x i32> @lshr_const_half_splat(<4 x i32> %v) {
; CHECK-LABEL: @lshr_const_half_splat(
-; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> <i32 undef, i32 8, i32 9, i32 undef>, [[V:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> <i32 8, i32 8, i32 9, i32 8>, [[V:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
@@ -583,11 +583,11 @@ define <2 x i32*> @pr23113(<4 x i32*> %A) {
ret <2 x i32*> %1
}
-; FIXME: Unused lanes in the new binop should not kill the entire op.
+; Unused lanes in the new binop should not kill the entire op (although it may simplify anyway as shown here).
define <2 x i32> @PR37648(<2 x i32> %x) {
; CHECK-LABEL: @PR37648(
-; CHECK-NEXT: ret <2 x i32> undef
+; CHECK-NEXT: ret <2 x i32> zeroinitializer
;
%splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
%r = urem <2 x i32> %splat, <i32 1, i32 1>
@@ -596,7 +596,7 @@ define <2 x i32> @PR37648(<2 x i32> %x) {
define <2 x float> @splat_first_fp(<2 x float> %x) {
; CHECK-LABEL: @splat_first_fp(
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[X:%.*]], <float 1.000000e+00, float undef>
+; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[X:%.*]], <float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[TMP2]]
;
OpenPOWER on IntegriCloud