From dcb8d304c319bf8d6c92cde4fcbcff11aa4be238 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 4 Jun 2018 22:26:45 +0000 Subject: [InstCombine] refine UB-handling in shuffle-binop transform As noted in rL333782, we can be both better for optimization and safer with this transform: BinOp (shuffle V1, Mask), C --> shuffle (BinOp V1, NewC), Mask The only potentially unsafe-to-speculate binops are integer div/rem. All other binops are always safe (although I don't see a way to assert that in code here). For opcodes like shifts that can produce poison, it can't matter here because we know the lanes with undef are dropped by the subsequent shuffle. Differential Revision: https://reviews.llvm.org/D47686 llvm-svn: 333962 --- .../InstCombine/InstructionCombining.cpp | 28 ++++++++-------- llvm/test/Transforms/InstCombine/vec_shuffle.ll | 38 +++++++++++----------- 2 files changed, 33 insertions(+), 33 deletions(-) (limited to 'llvm') diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index f87b94dd355..2b9db4baafa 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1417,21 +1417,21 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) { } } if (MayChange) { - // It's not safe to use a vector with undef elements because the entire - // instruction can be folded to undef (for example, div/rem divisors). - // Replace undef lanes with the first non-undef element. Vector demanded - // elements can change those back to undef values if that is safe. - Constant *SafeDummyConstant = nullptr; - for (unsigned i = 0; i < VWidth; ++i) { - if (!isa(NewVecC[i])) { - SafeDummyConstant = NewVecC[i]; - break; - } + // With integer div/rem instructions, it is not safe to use a vector with + // undef elements because the entire instruction can be folded to undef. + // So replace undef elements with '1' because that can never induce + // undefined behavior. All other binop opcodes are always safe to + // speculate, and therefore, it is fine to include undef elements for + // unused lanes (and using undefs may help optimization). + BinaryOperator::BinaryOps Opcode = Inst.getOpcode(); + if (Opcode == Instruction::UDiv || Opcode == Instruction::URem || + Opcode == Instruction::SDiv || Opcode == Instruction::SRem) { + assert(C->getType()->getScalarType()->isIntegerTy() && + "Not expecting FP opcodes/operands/constants here"); + for (unsigned i = 0; i < VWidth; ++i) + if (isa(NewVecC[i])) + NewVecC[i] = ConstantInt::get(NewVecC[i]->getType(), 1); } - assert(SafeDummyConstant && "Undef constant vector was not simplified?"); - for (unsigned i = 0; i < VWidth; ++i) - if (isa(NewVecC[i])) - NewVecC[i] = SafeDummyConstant; // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask) // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask) diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll index 59345b8a512..7e863fd2813 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -452,7 +452,7 @@ define <4 x i32> @mul_const_splat(<4 x i32> %v) { define <4 x i32> @lshr_const_half_splat(<4 x i32> %v) { ; CHECK-LABEL: @lshr_const_half_splat( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> , [[V:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> , [[V:%.*]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; @@ -643,7 +643,7 @@ define <2 x i32> @mul_splat_constant(<2 x i32> %x) { define <2 x i32> @shl_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @shl_splat_constant0( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> , [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> , [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -654,7 +654,7 @@ define <2 x i32> @shl_splat_constant0(<2 x i32> %x) { define <2 x i32> @shl_splat_constant1(<2 x i32> %x) { ; CHECK-LABEL: @shl_splat_constant1( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -665,7 +665,7 @@ define <2 x i32> @shl_splat_constant1(<2 x i32> %x) { define <2 x i32> @ashr_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @ashr_splat_constant0( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> , [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> , [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -676,7 +676,7 @@ define <2 x i32> @ashr_splat_constant0(<2 x i32> %x) { define <2 x i32> @ashr_splat_constant1(<2 x i32> %x) { ; CHECK-LABEL: @ashr_splat_constant1( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -687,7 +687,7 @@ define <2 x i32> @ashr_splat_constant1(<2 x i32> %x) { define <2 x i32> @lshr_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @lshr_splat_constant0( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> , [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> , [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -698,7 +698,7 @@ define <2 x i32> @lshr_splat_constant0(<2 x i32> %x) { define <2 x i32> @lshr_splat_constant1(<2 x i32> %x) { ; CHECK-LABEL: @lshr_splat_constant1( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -720,7 +720,7 @@ define <2 x i32> @urem_splat_constant0(<2 x i32> %x) { define <2 x i32> @urem_splat_constant1(<2 x i32> %x) { ; CHECK-LABEL: @urem_splat_constant1( -; CHECK-NEXT: [[TMP1:%.*]] = urem <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = urem <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -742,7 +742,7 @@ define <2 x i32> @srem_splat_constant0(<2 x i32> %x) { define <2 x i32> @srem_splat_constant1(<2 x i32> %x) { ; CHECK-LABEL: @srem_splat_constant1( -; CHECK-NEXT: [[TMP1:%.*]] = srem <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = srem <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -764,7 +764,7 @@ define <2 x i32> @udiv_splat_constant0(<2 x i32> %x) { define <2 x i32> @udiv_splat_constant1(<2 x i32> %x) { ; CHECK-LABEL: @udiv_splat_constant1( -; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -786,7 +786,7 @@ define <2 x i32> @sdiv_splat_constant0(<2 x i32> %x) { define <2 x i32> @sdiv_splat_constant1(<2 x i32> %x) { ; CHECK-LABEL: @sdiv_splat_constant1( -; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -830,7 +830,7 @@ define <2 x i32> @xor_splat_constant(<2 x i32> %x) { define <2 x float> @fadd_splat_constant(<2 x float> %x) { ; CHECK-LABEL: @fadd_splat_constant( -; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -841,7 +841,7 @@ define <2 x float> @fadd_splat_constant(<2 x float> %x) { define <2 x float> @fsub_splat_constant0(<2 x float> %x) { ; CHECK-LABEL: @fsub_splat_constant0( -; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> , [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> , [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -852,7 +852,7 @@ define <2 x float> @fsub_splat_constant0(<2 x float> %x) { define <2 x float> @fsub_splat_constant1(<2 x float> %x) { ; CHECK-LABEL: @fsub_splat_constant1( -; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -863,7 +863,7 @@ define <2 x float> @fsub_splat_constant1(<2 x float> %x) { define <2 x float> @fmul_splat_constant(<2 x float> %x) { ; CHECK-LABEL: @fmul_splat_constant( -; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -874,7 +874,7 @@ define <2 x float> @fmul_splat_constant(<2 x float> %x) { define <2 x float> @fdiv_splat_constant0(<2 x float> %x) { ; CHECK-LABEL: @fdiv_splat_constant0( -; CHECK-NEXT: [[TMP1:%.*]] = fdiv <2 x float> , [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fdiv <2 x float> , [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -885,7 +885,7 @@ define <2 x float> @fdiv_splat_constant0(<2 x float> %x) { define <2 x float> @fdiv_splat_constant1(<2 x float> %x) { ; CHECK-LABEL: @fdiv_splat_constant1( -; CHECK-NEXT: [[TMP1:%.*]] = fdiv <2 x float> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = fdiv <2 x float> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -896,7 +896,7 @@ define <2 x float> @fdiv_splat_constant1(<2 x float> %x) { define <2 x float> @frem_splat_constant0(<2 x float> %x) { ; CHECK-LABEL: @frem_splat_constant0( -; CHECK-NEXT: [[TMP1:%.*]] = frem <2 x float> , [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = frem <2 x float> , [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -907,7 +907,7 @@ define <2 x float> @frem_splat_constant0(<2 x float> %x) { define <2 x float> @frem_splat_constant1(<2 x float> %x) { ; CHECK-LABEL: @frem_splat_constant1( -; CHECK-NEXT: [[TMP1:%.*]] = frem <2 x float> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = frem <2 x float> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x float> [[R]] ; -- cgit v1.2.3