summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2015-08-10 20:21:15 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2015-08-10 20:21:15 +0000
commita3a72b41de527ac6b4e013bcda1dff70ad191a60 (patch)
tree70c028c6f1f64b3386c3fad21b63322727b341a8 /llvm/lib
parent8e7661ec05d4397a242205677225a06bad1182ea (diff)
downloadbcm5719-llvm-a3a72b41de527ac6b4e013bcda1dff70ad191a60.tar.gz
bcm5719-llvm-a3a72b41de527ac6b4e013bcda1dff70ad191a60.zip
[InstCombine] Move SSE2/AVX2 arithmetic vector shift folding to instcombiner
As discussed in D11760, this patch moves the (V)PSRA(WD) arithmetic shift-by-constant folding to InstCombine to match the logical shift implementations. Differential Revision: http://reviews.llvm.org/D11886 llvm-svn: 244495
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp44
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp38
2 files changed, 31 insertions, 51 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6dd4fb8d98e..aac4b1452c0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23464,50 +23464,6 @@ static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-
- // Packed SSE2/AVX2 arithmetic shift immediate intrinsics.
- case Intrinsic::x86_sse2_psrai_w:
- case Intrinsic::x86_sse2_psrai_d:
- case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d:
- case Intrinsic::x86_sse2_psra_w:
- case Intrinsic::x86_sse2_psra_d:
- case Intrinsic::x86_avx2_psra_w:
- case Intrinsic::x86_avx2_psra_d: {
- SDValue Op0 = N->getOperand(1);
- SDValue Op1 = N->getOperand(2);
- EVT VT = Op0.getValueType();
- assert(VT.isVector() && "Expected a vector type!");
-
- if (isa<BuildVectorSDNode>(Op1))
- Op1 = Op1.getOperand(0);
-
- if (!isa<ConstantSDNode>(Op1))
- return SDValue();
-
- EVT SVT = VT.getVectorElementType();
- unsigned SVTBits = SVT.getSizeInBits();
-
- ConstantSDNode *CND = cast<ConstantSDNode>(Op1);
- const APInt &C = APInt(SVTBits, CND->getAPIntValue().getZExtValue());
- uint64_t ShAmt = C.getZExtValue();
-
- // Don't try to convert this shift into a ISD::SRA if the shift
- // count is bigger than or equal to the element size.
- if (ShAmt >= SVTBits)
- return SDValue();
-
- // Trivial case: if the shift count is zero, then fold this
- // into the first operand.
- if (ShAmt == 0)
- return Op0;
-
- // Replace this packed shift intrinsic with a target independent
- // shift dag node.
- SDLoc DL(N);
- SDValue Splat = DAG.getConstant(C, DL, VT);
- return DAG.getNode(ISD::SRA, DL, VT, Op0, Splat);
- }
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index de413c42348..600c8c36392 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -199,7 +199,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
static Value *SimplifyX86immshift(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder,
- bool ShiftLeft) {
+ bool LogicalShift, bool ShiftLeft) {
+ assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
+
// Simplify if count is constant.
auto Arg1 = II.getArgOperand(1);
auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
@@ -238,9 +240,15 @@ static Value *SimplifyX86immshift(const IntrinsicInst &II,
if (Count == 0)
return Vec;
- // Handle cases when Shift >= BitWidth - just return zero.
- if (Count.uge(BitWidth))
- return ConstantAggregateZero::get(VT);
+ // Handle cases when Shift >= BitWidth.
+ if (Count.uge(BitWidth)) {
+ // If LogicalShift - just return zero.
+ if (LogicalShift)
+ return ConstantAggregateZero::get(VT);
+
+ // If ArithmeticShift - clamp Shift to (BitWidth - 1).
+ Count = APInt(64, BitWidth - 1);
+ }
// Get a constant vector of the same type as the first operand.
auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
@@ -249,7 +257,10 @@ static Value *SimplifyX86immshift(const IntrinsicInst &II,
if (ShiftLeft)
return Builder.CreateShl(Vec, ShiftVec);
- return Builder.CreateLShr(Vec, ShiftVec);
+ if (LogicalShift)
+ return Builder.CreateLShr(Vec, ShiftVec);
+
+ return Builder.CreateAShr(Vec, ShiftVec);
}
static Value *SimplifyX86extend(const IntrinsicInst &II,
@@ -776,6 +787,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
+ // Constant fold ashr( <A x Bi>, Ci ).
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_avx2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ if (Value *V = SimplifyX86immshift(*II, *Builder, false, false))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
// Constant fold lshr( <A x Bi>, Ci ).
case Intrinsic::x86_sse2_psrl_d:
case Intrinsic::x86_sse2_psrl_q:
@@ -789,7 +813,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx2_psrli_d:
case Intrinsic::x86_avx2_psrli_q:
case Intrinsic::x86_avx2_psrli_w:
- if (Value *V = SimplifyX86immshift(*II, *Builder, false))
+ if (Value *V = SimplifyX86immshift(*II, *Builder, true, false))
return ReplaceInstUsesWith(*II, V);
break;
@@ -806,7 +830,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx2_pslli_d:
case Intrinsic::x86_avx2_pslli_q:
case Intrinsic::x86_avx2_pslli_w:
- if (Value *V = SimplifyX86immshift(*II, *Builder, true))
+ if (Value *V = SimplifyX86immshift(*II, *Builder, true, true))
return ReplaceInstUsesWith(*II, V);
break;
OpenPOWER on IntegriCloud