diff options
author | Roman Lebedev <lebedev.ri@gmail.com> | 2019-08-29 10:50:09 +0000 |
---|---|---|
committer | Roman Lebedev <lebedev.ri@gmail.com> | 2019-08-29 10:50:09 +0000 |
commit | cc7495a3556c142bd7250ff184515319815d77ef (patch) | |
tree | d6d5c8657e239f9ea337026d9c7f4f9987ea0c4e | |
parent | 8365e420108f59d370d1937dba9c686d3186ef66 (diff) | |
download | bcm5719-llvm-cc7495a3556c142bd7250ff184515319815d77ef.tar.gz bcm5719-llvm-cc7495a3556c142bd7250ff184515319815d77ef.zip |
[X86][CodeGen][NFC] Delay `combineIncDecVector()` from DAGCombine to X86DAGToDAGISel
Summary:
We were previously doing it in DAGCombine.
But we also want to do `sub %x, C` -> `add %x, (sub 0, C)` for vectors in DAGCombine.
So if we had `sub %x, -1`, we'll transform it to `add %x, 1`,
which `combineIncDecVector()` will immediately transform back into `sub %x, -1`,
and here we go again...
I've marked this as NFC since not a single test changes,
but since that 'changes' DAGCombine, probably this isn't fully NFC.
Reviewers: RKSimon, craig.topper, spatel
Reviewed By: craig.topper
Subscribers: hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62327
llvm-svn: 370327
-rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 48 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 49 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/stack-folding-int-avx1.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/stack-folding-int-sse42.ll | 2 |
5 files changed, 60 insertions, 44 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index e0a462471a7..eaedf14a9d8 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -507,6 +507,7 @@ namespace { bool shrinkAndImmediate(SDNode *N); bool isMaskZeroExtended(SDNode *N) const; bool tryShiftAmountMod(SDNode *N); + bool combineIncDecVector(SDNode *Node); bool tryShrinkShlLogicImm(SDNode *N); bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask); @@ -3775,6 +3776,49 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) { return true; } +/// Convert vector increment or decrement to sub/add with an all-ones constant: +/// add X, <1, 1...> --> sub X, <-1, -1...> +/// sub X, <1, 1...> --> add X, <-1, -1...> +/// The all-ones vector constant can be materialized using a pcmpeq instruction +/// that is commonly recognized as an idiom (has no register dependency), so +/// that's better/smaller than loading a splat 1 constant. +bool X86DAGToDAGISel::combineIncDecVector(SDNode *Node) { + assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB) && + "Unexpected opcode for increment/decrement transform"); + + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Should only be called for vectors."); + + SDValue X = Node->getOperand(0); + SDValue OneVec = Node->getOperand(1); + + APInt SplatVal; + if (!X86::isConstantSplat(OneVec, SplatVal) || !SplatVal.isOneValue()) + return false; + + SDLoc DL(Node); + SDValue AllOnesVec; + + APInt Ones = APInt::getAllOnesValue(32); + assert(VT.getSizeInBits() % 32 == 0 && + "Expected bit count to be a multiple of 32"); + unsigned NumElts = VT.getSizeInBits() / 32; + assert(NumElts > 0 && "Expected to get non-empty vector."); + AllOnesVec = + CurDAG->getConstant(Ones, DL, MVT::getVectorVT(MVT::i32, NumElts)); + insertDAGNode(*CurDAG, X, AllOnesVec); + + AllOnesVec = CurDAG->getBitcast(VT, AllOnesVec); + insertDAGNode(*CurDAG, X, AllOnesVec); + + unsigned NewOpcode = Node->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; + SDValue NewNode = CurDAG->getNode(NewOpcode, DL, VT, X, AllOnesVec); + + ReplaceNode(Node, NewNode.getNode()); + SelectCode(NewNode.getNode()); + return true; +} + /// If the high bits of an 'and' operand are known zero, try setting the /// high bits of an 'and' constant operand to produce a smaller encoding by /// creating a small, sign-extended negative immediate rather than a large @@ -4347,6 +4391,10 @@ void X86DAGToDAGISel::Select(SDNode *Node) { LLVM_FALLTHROUGH; case ISD::ADD: case ISD::SUB: { + if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && NVT.isVector() && + combineIncDecVector(Node)) + return; + // Try to avoid folding immediates with multiple uses for optsize. // This code tries to select to register form directly to avoid going // through the isel table which might fold the immediate. We can't change diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6453d3ad0bc..3f37cb5ad8c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6215,7 +6215,9 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, return false; } -static bool isConstantSplat(SDValue Op, APInt &SplatVal) { +namespace llvm { +namespace X86 { +bool isConstantSplat(SDValue Op, APInt &SplatVal) { APInt UndefElts; SmallVector<APInt, 16> EltBits; if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(), @@ -6238,6 +6240,8 @@ static bool isConstantSplat(SDValue Op, APInt &SplatVal) { return false; } +} // namespace X86 +} // namespace llvm static bool getTargetShuffleMaskIndices(SDValue MaskNode, unsigned MaskEltSizeInBits, @@ -18115,7 +18119,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, std::swap(Op0, Op1); APInt APIntShiftAmt; - if (isConstantSplat(Amt, APIntShiftAmt)) { + if (X86::isConstantSplat(Amt, APIntShiftAmt)) { uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits()); return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8)); @@ -25337,7 +25341,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // Optimize shl/srl/sra with constant shift amount. APInt APIntShiftAmt; - if (!isConstantSplat(Amt, APIntShiftAmt)) + if (!X86::isConstantSplat(Amt, APIntShiftAmt)) return SDValue(); // If the shift amount is out of range, return undef. @@ -43750,39 +43754,6 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::ADD, DL, VT, Sad, OtherOp, Flags); } -/// Convert vector increment or decrement to sub/add with an all-ones constant: -/// add X, <1, 1...> --> sub X, <-1, -1...> -/// sub X, <1, 1...> --> add X, <-1, -1...> -/// The all-ones vector constant can be materialized using a pcmpeq instruction -/// that is commonly recognized as an idiom (has no register dependency), so -/// that's better/smaller than loading a splat 1 constant. -static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { - assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && - "Unexpected opcode for increment/decrement transform"); - - // Delay this until legalize ops to avoid interfering with early DAG combines - // that may expect canonical adds. - // FIXME: We may want to consider moving this to custom lowering or all the - // way to isel, but lets start here. - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - - // Pseudo-legality check: getOnesVector() expects one of these types, so bail - // out and wait for legalization if we have an unsupported vector length. - EVT VT = N->getValueType(0); - if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector()) - return SDValue(); - - APInt SplatVal; - if (!isConstantSplat(N->getOperand(1), SplatVal) || !SplatVal.isOneValue()) - return SDValue(); - - SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N)); - unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; - return DAG.getNode(NewOpcode, SDLoc(N), VT, N->getOperand(0), AllOnesVec); -} - static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1, const SDLoc &DL, EVT VT, const X86Subtarget &Subtarget) { @@ -44045,9 +44016,6 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, HADDBuilder); } - if (SDValue V = combineIncDecVector(N, DAG, DCI)) - return V; - return combineAddOrSubToADCOrSBB(N, DAG); } @@ -44176,9 +44144,6 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, HSUBBuilder); } - if (SDValue V = combineIncDecVector(N, DAG, DCI)) - return V; - // Try to create PSUBUS if SUB's argument is max/min if (SDValue V = combineSubToSubus(N, DAG, Subtarget)) return V; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 53bdc84cee1..05e4f16fc49 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -683,6 +683,9 @@ namespace llvm { bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO); + /// If Op is a constant whose elements are all the same constant or + /// undefined, return true and return the constant value in \p SplatVal. + bool isConstantSplat(SDValue Op, APInt &SplatVal); } // end namespace X86 //===--------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/stack-folding-int-avx1.ll b/llvm/test/CodeGen/X86/stack-folding-int-avx1.ll index b92a70b8522..46fd1ab5dfb 100644 --- a/llvm/test/CodeGen/X86/stack-folding-int-avx1.ll +++ b/llvm/test/CodeGen/X86/stack-folding-int-avx1.ll @@ -540,8 +540,8 @@ define <16 x i8> @stack_fold_pandn(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vpandn {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() diff --git a/llvm/test/CodeGen/X86/stack-folding-int-sse42.ll b/llvm/test/CodeGen/X86/stack-folding-int-sse42.ll index 873be133b3c..3efaeddfbd6 100644 --- a/llvm/test/CodeGen/X86/stack-folding-int-sse42.ll +++ b/llvm/test/CodeGen/X86/stack-folding-int-sse42.ll @@ -724,8 +724,8 @@ define <16 x i8> @stack_fold_pandn(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 ; CHECK-NEXT: pandn {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 ; CHECK-NEXT: psubb %xmm1, %xmm0 ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() |