diff options
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 19 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/shuffle_select.ll | 42 |
2 files changed, 25 insertions, 36 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index f317252a00d..8fecd6904c3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1197,17 +1197,12 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) { else return nullptr; - auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1); - Value *X = Op0IsBinop ? Op1 : Op0; - // TODO: Allow div/rem by accounting for potential UB due to undef elements. - if (BO->isIntDivRem()) - return nullptr; - // The identity constant for a binop leaves a variable operand unchanged. For // a vector, this is a splat of something like 0, -1, or 1. // If there's no identity constant for this binop, we're done. + auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1); BinaryOperator::BinaryOps BOpcode = BO->getOpcode(); - Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType()); + Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true); if (!IdC) return nullptr; @@ -1219,14 +1214,22 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) { Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) : ConstantExpr::getShuffleVector(IdC, C, Mask); + bool MightCreatePoisonOrUB = + Mask->containsUndefElement() && + (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode)); + if (MightCreatePoisonOrUB) + NewC = getSafeVectorConstantForBinop(BOpcode, NewC, true); + // shuf (bop X, C), X, M --> bop X, C' // shuf X, (bop X, C), M --> bop X, C' + Value *X = Op0IsBinop ? Op1 : Op0; Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC); NewBO->copyIRFlags(BO); // An undef shuffle mask element may propagate as an undef constant element in // the new binop. That would produce poison where the original code might not. - if (Mask->containsUndefElement()) + // If we already made a safe constant, then there's no danger. + if (Mask->containsUndefElement() && !MightCreatePoisonOrUB) NewBO->dropPoisonGeneratingFlags(); return NewBO; } diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll index 0ccf8d13f09..57f6aee02d4 100644 --- a/llvm/test/Transforms/InstCombine/shuffle_select.ll +++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll @@ -77,8 +77,7 @@ define <4 x i32> @mul(<4 x i32> %v) { define <4 x i32> @shl(<4 x i32> %v) { ; CHECK-LABEL: @shl( -; CHECK-NEXT: [[B:%.*]] = shl <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14> -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 4, i32 1, i32 2, i32 7> +; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> @@ -88,8 +87,7 @@ define <4 x i32> @shl(<4 x i32> %v) { define <4 x i32> @shl_nsw(<4 x i32> %v) { ; CHECK-LABEL: @shl_nsw( -; CHECK-NEXT: [[B:%.*]] = shl nsw <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14> -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 4, i32 1, i32 2, i32 7> +; CHECK-NEXT: [[S:%.*]] = shl nsw <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = shl nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> @@ -99,8 +97,7 @@ define <4 x i32> @shl_nsw(<4 x i32> %v) { define <4 x i32> @shl_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @shl_undef_mask_elt( -; CHECK-NEXT: [[B:%.*]] = shl <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14> -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7> +; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> @@ -110,8 +107,7 @@ define <4 x i32> @shl_undef_mask_elt(<4 x i32> %v) { define <4 x i32> @shl_nuw_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @shl_nuw_undef_mask_elt( -; CHECK-NEXT: [[B:%.*]] = shl nuw <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14> -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> +; CHECK-NEXT: [[S:%.*]] = shl nuw <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = shl nuw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> @@ -121,8 +117,7 @@ define <4 x i32> @shl_nuw_undef_mask_elt(<4 x i32> %v) { define <4 x i32> @lshr_constant_op0(<4 x i32> %v) { ; CHECK-LABEL: @lshr_constant_op0( -; CHECK-NEXT: [[B:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14> -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 4, i32 5, i32 2, i32 7> +; CHECK-NEXT: [[S:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = lshr <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> @@ -132,8 +127,7 @@ define <4 x i32> @lshr_constant_op0(<4 x i32> %v) { define <4 x i32> @lshr_exact_constant_op0(<4 x i32> %v) { ; CHECK-LABEL: @lshr_exact_constant_op0( -; CHECK-NEXT: [[B:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14> -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 4, i32 5, i32 2, i32 7> +; CHECK-NEXT: [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = lshr exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> @@ -143,8 +137,7 @@ define <4 x i32> @lshr_exact_constant_op0(<4 x i32> %v) { define <4 x i32> @lshr_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @lshr_undef_mask_elt( -; CHECK-NEXT: [[B:%.*]] = shl <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14> -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7> +; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> @@ -154,8 +147,7 @@ define <4 x i32> @lshr_undef_mask_elt(<4 x i32> %v) { define <4 x i32> @lshr_exact_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @lshr_exact_undef_mask_elt( -; CHECK-NEXT: [[B:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14> -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> +; CHECK-NEXT: [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = lshr exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> @@ -178,8 +170,7 @@ define <4 x i32> @lshr_constant_op1(<4 x i32> %v) { define <3 x i32> @ashr(<3 x i32> %v) { ; CHECK-LABEL: @ashr( -; CHECK-NEXT: [[B:%.*]] = ashr <3 x i32> [[V:%.*]], <i32 11, i32 12, i32 13> -; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x i32> [[B]], <3 x i32> [[V]], <3 x i32> <i32 3, i32 1, i32 2> +; CHECK-NEXT: [[S:%.*]] = ashr <3 x i32> [[V:%.*]], <i32 0, i32 12, i32 13> ; CHECK-NEXT: ret <3 x i32> [[S]] ; %b = ashr <3 x i32> %v, <i32 11, i32 12, i32 13> @@ -270,8 +261,7 @@ define <4 x i32> @udiv_exact_undef_mask_elt(<4 x i32> %v) { define <4 x i32> @sdiv(<4 x i32> %v) { ; CHECK-LABEL: @sdiv( -; CHECK-NEXT: [[B:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14> -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 4, i32 1, i32 6, i32 3> +; CHECK-NEXT: [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> @@ -281,8 +271,7 @@ define <4 x i32> @sdiv(<4 x i32> %v) { define <4 x i32> @sdiv_exact(<4 x i32> %v) { ; CHECK-LABEL: @sdiv_exact( -; CHECK-NEXT: [[B:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14> -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 4, i32 1, i32 6, i32 3> +; CHECK-NEXT: [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> @@ -294,8 +283,7 @@ define <4 x i32> @sdiv_exact(<4 x i32> %v) { define <4 x i32> @sdiv_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @sdiv_undef_mask_elt( -; CHECK-NEXT: [[B:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14> -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 undef, i32 1, i32 6, i32 undef> +; CHECK-NEXT: [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> @@ -305,8 +293,7 @@ define <4 x i32> @sdiv_undef_mask_elt(<4 x i32> %v) { define <4 x i32> @sdiv_exact_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @sdiv_exact_undef_mask_elt( -; CHECK-NEXT: [[B:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14> -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 undef, i32 1, i32 6, i32 undef> +; CHECK-NEXT: [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> @@ -395,8 +382,7 @@ define <4 x double> @fdiv_constant_op0(<4 x double> %v) { define <4 x double> @fdiv_constant_op1(<4 x double> %v) { ; CHECK-LABEL: @fdiv_constant_op1( -; CHECK-NEXT: [[B:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], <double 4.100000e+01, double 4.200000e+01, double 4.300000e+01, double 4.400000e+01> -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7> +; CHECK-NEXT: [[S:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], <double undef, double 1.000000e+00, double 4.300000e+01, double 4.400000e+01> ; CHECK-NEXT: ret <4 x double> [[S]] ; %b = fdiv reassoc <4 x double> %v, <double 41.0, double 42.0, double 43.0, double 44.0> |

