diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-05-01 17:13:35 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-05-01 17:13:35 +0000 |
| commit | 9f04d97cd71a4248e3d064d6d194699e688d1358 (patch) | |
| tree | 496a313f5bfb2e3492817b744721fc8bca109244 /llvm/lib | |
| parent | fa78ad57edfd2fec2882c0ca1dbd7eec34051844 (diff) | |
| download | bcm5719-llvm-9f04d97cd71a4248e3d064d6d194699e688d1358.tar.gz bcm5719-llvm-9f04d97cd71a4248e3d064d6d194699e688d1358.zip | |
[X86][SSE] Fold scalar horizontal add/sub for non-0/1 element extractions
We already perform horizontal add/sub if we extract from elements 0 and 1, this patch extends it to non-0/1 element extraction indices (as long as they are from the lowest 128-bit vector).
Differential Revision: https://reviews.llvm.org/D61263
llvm-svn: 359707
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 17 |
1 files changed, 11 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7f92e5138d1..aed2c0745af 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19020,33 +19020,38 @@ static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG, } unsigned LExtIndex = LHS.getConstantOperandVal(1); unsigned RExtIndex = RHS.getConstantOperandVal(1); - if (LExtIndex == 1 && RExtIndex == 0 && + if ((LExtIndex & 1) == 1 && (RExtIndex & 1) == 0 && (HOpcode == X86ISD::HADD || HOpcode == X86ISD::FHADD)) std::swap(LExtIndex, RExtIndex); - // TODO: This can be extended to handle other adjacent extract pairs. - if (LExtIndex != 0 || RExtIndex != 1) + if ((LExtIndex & 1) != 0 || RExtIndex != (LExtIndex + 1)) return Op; SDValue X = LHS.getOperand(0); EVT VecVT = X.getValueType(); unsigned BitWidth = VecVT.getSizeInBits(); + unsigned NumLanes = BitWidth / 128; + unsigned NumEltsPerLane = VecVT.getVectorNumElements() / NumLanes; assert((BitWidth == 128 || BitWidth == 256 || BitWidth == 512) && "Not expecting illegal vector widths here"); // Creating a 256-bit horizontal op would be wasteful, and there is no 512-bit - // equivalent, so extract the 256/512-bit source op to 128-bit. - // This is free: ymm/zmm -> xmm. + // equivalent, so extract the 256/512-bit source op to 128-bit if we can. + // This is free if we're extracting from the bottom lane: ymm/zmm -> xmm. + if (NumEltsPerLane <= LExtIndex) + return Op; + SDLoc DL(Op); if (BitWidth == 256 || BitWidth == 512) X = extract128BitVector(X, 0, DAG, DL); // add (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hadd X, X), 0 // add (extractelt (X, 1), extractelt (X, 0)) --> extractelt (hadd X, X), 0 + // add (extractelt (X, 2), extractelt (X, 3)) --> extractelt (hadd X, X), 1 // sub (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hsub X, X), 0 SDValue HOp = DAG.getNode(HOpcode, DL, X.getValueType(), X, X); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getSimpleValueType(), HOp, - DAG.getIntPtrConstant(0, DL)); + DAG.getIntPtrConstant(LExtIndex / 2, DL)); } /// Depending on uarch and/or optimizing for size, we might prefer to use a |

