diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-08-28 10:14:09 +0000 | 
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-08-28 10:14:09 +0000 | 
| commit | f119e27d80a43e250fc8b09453419cfc4e799359 (patch) | |
| tree | b6aa711b3581e748dcccc04ccc298a3ddf4a68e2 /llvm/lib/Target/X86/X86ISelLowering.cpp | |
| parent | 0addd170ab0880941fa4089c2717f3f3a0e4e25a (diff) | |
| download | bcm5719-llvm-f119e27d80a43e250fc8b09453419cfc4e799359.tar.gz bcm5719-llvm-f119e27d80a43e250fc8b09453419cfc4e799359.zip | |
[X86][SSE] Avoid vector extraction/insertion for non-constant uniform shifts
As discussed on D51263, we're better off using byte shifts to clear the upper bits on pre-SSE41 hardware.
llvm-svn: 340810
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 25 | 
1 files changed, 19 insertions, 6 deletions
| diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7755113873a..63dd407a0bb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20451,30 +20451,43 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,    // | i64             | Yes, No    | Use ShAmt as lowest elt               |    // | i32             | Yes        | zero-extend in-reg                    |    // | (i32 zext(i16)) | Yes        | zero-extend in-reg                    | +  // | (i32 zext(i16)) | No         | byte-shift-in-reg                     |    // | i16/i32         | No         | v4i32 build_vector(ShAmt, 0, ud, ud)) |    // +=================+============+=======================================+    if (SVT == MVT::i64)      ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt); -  else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND && +  else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND && +           ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&             ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {      ShAmt = ShAmt.getOperand(0); -    ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v8i16, ShAmt); -    ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64); +    MVT AmtTy = MVT::v8i16; +    ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt); +    if (Subtarget.hasSSE41()) +      ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64); +    else { +      SDValue ByteShift = DAG.getConstant( +          (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8); +      ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt); +      ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, +                          ByteShift); +      ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, +                          ByteShift); +    }    } else if (Subtarget.hasSSE41() &&               ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {      ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);      ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);    } else { -    SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), -                        DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)}; +    SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT), +                        DAG.getUNDEF(SVT)};      ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps);    }    // The return type has to be a 128-bit type with the same element    // type as the input type.    MVT EltVT = VT.getVectorElementType(); -  MVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits()); +  MVT ShVT = MVT::getVectorVT(EltVT, 128 / EltVT.getSizeInBits());    ShAmt = DAG.getBitcast(ShVT, ShAmt);    return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); | 

