summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp26
1 files changed, 16 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 47382149f37..8fd39156865 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31029,8 +31029,8 @@ static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0,
// Given two zexts of <k x i8> to <k x i32>, create a PSADBW of the inputs
// to these zexts.
static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0,
- const SDValue &Zext1, const SDLoc &DL) {
-
+ const SDValue &Zext1, const SDLoc &DL,
+ const X86Subtarget &Subtarget) {
// Find the appropriate width for the PSADBW.
EVT InVT = Zext0.getOperand(0).getValueType();
unsigned RegSize = std::max(128u, InVT.getSizeInBits());
@@ -31045,9 +31045,15 @@ static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0,
Ops[0] = Zext1.getOperand(0);
SDValue SadOp1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
- // Actually build the SAD
+ // Actually build the SAD, split as 128/256/512 bits for SSE/AVX2/AVX512BW.
+ auto PSADBWBuilder = [](SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
+ SDValue Op1) {
+ MVT VT = MVT::getVectorVT(MVT::i64, Op0.getValueSizeInBits() / 64);
+ return DAG.getNode(X86ISD::PSADBW, DL, VT, Op0, Op1);
+ };
MVT SadVT = MVT::getVectorVT(MVT::i64, RegSize / 64);
- return DAG.getNode(X86ISD::PSADBW, DL, SadVT, SadOp0, SadOp1);
+ return SplitBinaryOpsAndApply(DAG, Subtarget, DL, SadVT, SadOp0, SadOp1,
+ PSADBWBuilder);
}
// Attempt to replace an min/max v8i16/v16i8 horizontal reduction with
@@ -31216,10 +31222,10 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
unsigned RegSize = 128;
if (Subtarget.useBWIRegs())
RegSize = 512;
- else if (Subtarget.hasAVX2())
+ else if (Subtarget.hasAVX())
RegSize = 256;
- // We handle upto v16i* for SSE2 / v32i* for AVX2 / v64i* for AVX512.
+ // We handle upto v16i* for SSE2 / v32i* for AVX / v64i* for AVX512.
// TODO: We should be able to handle larger vectors by splitting them before
// feeding them into several SADs, and then reducing over those.
if (RegSize / VT.getVectorNumElements() < 8)
@@ -31254,7 +31260,7 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
// Create the SAD instruction.
SDLoc DL(Extract);
- SDValue SAD = createPSADBW(DAG, Zext0, Zext1, DL);
+ SDValue SAD = createPSADBW(DAG, Zext0, Zext1, DL, Subtarget);
// If the original vector was wider than 8 elements, sum over the results
// in the SAD vector.
@@ -37404,10 +37410,10 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
unsigned RegSize = 128;
if (Subtarget.useBWIRegs())
RegSize = 512;
- else if (Subtarget.hasAVX2())
+ else if (Subtarget.hasAVX())
RegSize = 256;
- // We only handle v16i32 for SSE2 / v32i32 for AVX2 / v64i32 for AVX512.
+ // We only handle v16i32 for SSE2 / v32i32 for AVX / v64i32 for AVX512.
// TODO: We should be able to handle larger vectors by splitting them before
// feeding them into several SADs, and then reducing over those.
if (VT.getSizeInBits() / 4 > RegSize)
@@ -37433,7 +37439,7 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
// reduction. Note that the number of elements of the result of SAD is less
// than the number of elements of its input. Therefore, we could only update
// part of elements in the reduction vector.
- SDValue Sad = createPSADBW(DAG, Op0, Op1, DL);
+ SDValue Sad = createPSADBW(DAG, Op0, Op1, DL, Subtarget);
// The output of PSADBW is a vector of i64.
// We need to turn the vector of i64 into a vector of i32.
OpenPOWER on IntegriCloud