summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-08-28 10:14:09 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-08-28 10:14:09 +0000
commitf119e27d80a43e250fc8b09453419cfc4e799359 (patch)
treeb6aa711b3581e748dcccc04ccc298a3ddf4a68e2 /llvm/lib/Target
parent0addd170ab0880941fa4089c2717f3f3a0e4e25a (diff)
downloadbcm5719-llvm-f119e27d80a43e250fc8b09453419cfc4e799359.tar.gz
bcm5719-llvm-f119e27d80a43e250fc8b09453419cfc4e799359.zip
[X86][SSE] Avoid vector extraction/insertion for non-constant uniform shifts
As discussed on D51263, we're better off using byte shifts to clear the upper bits on pre-SSE41 hardware. llvm-svn: 340810
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp25
1 files changed, 19 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7755113873a..63dd407a0bb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20451,30 +20451,43 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
// | i64 | Yes, No | Use ShAmt as lowest elt |
// | i32 | Yes | zero-extend in-reg |
// | (i32 zext(i16)) | Yes | zero-extend in-reg |
+ // | (i32 zext(i16)) | No | byte-shift-in-reg |
// | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) |
// +=================+============+=======================================+
if (SVT == MVT::i64)
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt);
- else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
+ else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
+ ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
ShAmt = ShAmt.getOperand(0);
- ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v8i16, ShAmt);
- ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
+ MVT AmtTy = MVT::v8i16;
+ ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt);
+ if (Subtarget.hasSSE41())
+ ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
+ else {
+ SDValue ByteShift = DAG.getConstant(
+ (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
+ ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt);
+ ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
+ ByteShift);
+ ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
+ ByteShift);
+ }
} else if (Subtarget.hasSSE41() &&
ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
} else {
- SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT),
- DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)};
+ SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT),
+ DAG.getUNDEF(SVT)};
ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps);
}
// The return type has to be a 128-bit type with the same element
// type as the input type.
MVT EltVT = VT.getVectorElementType();
- MVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
+ MVT ShVT = MVT::getVectorVT(EltVT, 128 / EltVT.getSizeInBits());
ShAmt = DAG.getBitcast(ShVT, ShAmt);
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
OpenPOWER on IntegriCloud