diff options
| author | Craig Topper <craig.topper@intel.com> | 2019-03-20 21:30:20 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2019-03-20 21:30:20 +0000 |
| commit | 03675533043ddbf2ad27f260ccc410952977f574 (patch) | |
| tree | dd1b6e7e2190944a9553bef01e6a73d70e8e1a4e /llvm/lib | |
| parent | bbcb95a64e186dacd66034340de81b5efc8c000e (diff) | |
| download | bcm5719-llvm-03675533043ddbf2ad27f260ccc410952977f574.tar.gz bcm5719-llvm-03675533043ddbf2ad27f260ccc410952977f574.zip | |
[X86] Call lowerShuffleAsBitMask for 512-bit vectors in lowerShuffleAsBlend.
This patch enables the use of lowerShuffleAsBitMask for 512-bit blends before
falling back to move immedate, GPR to k-register, and masked op.
I had to make some changes to support v8i64 when i64 is not a legal type. And to
support floating point types.
This trades a load for the move immediate and GPR move which is higher latency.
But its probably better for register pressure not having to hop through other
register classes. The load+and should play better with LICM and
rematerialization I think.
Differential Revision: https://reviews.llvm.org/D59479
llvm-svn: 356618
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 54 |
1 files changed, 43 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1185b8bdb7e..d871f0ce3b7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10364,11 +10364,30 @@ static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, ArrayRef<int> Mask, /// one of the inputs being zeroable. static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, - const APInt &Zeroable, SelectionDAG &DAG) { - assert(!VT.isFloatingPoint() && "Floating point types are not supported"); + const APInt &Zeroable, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + MVT MaskVT = VT; MVT EltVT = VT.getVectorElementType(); - SDValue Zero = DAG.getConstant(0, DL, EltVT); - SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT); + SDValue Zero, AllOnes; + // Use f64 if i64 isn't legal. + if (EltVT == MVT::i64 && !Subtarget.is64Bit()) { + EltVT = MVT::f64; + MaskVT = MVT::getVectorVT(EltVT, Mask.size()); + } + + MVT LogicVT = VT; + if (EltVT == MVT::f32 || EltVT == MVT::f64) { + Zero = DAG.getConstantFP(0.0, DL, MVT::f64); + AllOnes = DAG.getConstantFP(APInt::getAllOnesValue(64).bitsToDouble(), DL, + EltVT); + LogicVT = MVT::getVectorVT(EltVT == MVT::f64 ? MVT::i64 : MVT::i32, + Mask.size()); + } else { + Zero = DAG.getConstant(0, DL, EltVT); + AllOnes = DAG.getAllOnesConstant(DL, EltVT); + } + SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero); SDValue V; for (int i = 0, Size = Mask.size(); i < Size; ++i) { @@ -10386,8 +10405,11 @@ static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1, if (!V) return SDValue(); // No non-zeroable elements! - SDValue VMask = DAG.getBuildVector(VT, DL, VMaskOps); - return DAG.getNode(ISD::AND, DL, VT, V, VMask); + SDValue VMask = DAG.getBuildVector(MaskVT, DL, VMaskOps); + VMask = DAG.getBitcast(LogicVT, VMask); + V = DAG.getBitcast(LogicVT, V); + SDValue And = DAG.getNode(ISD::AND, DL, LogicVT, V, VMask); + return DAG.getBitcast(VT, And); } /// Try to emit a blend instruction for a shuffle using bit math. @@ -10552,7 +10574,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB. if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, - DAG)) + Subtarget, DAG)) return Masked; if (Subtarget.hasBWI() && Subtarget.hasVLX()) { @@ -10610,6 +10632,16 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, case MVT::v16i32: case MVT::v32i16: case MVT::v64i8: { + // Attempt to lower to a bitmask if we can. Only if not optimizing for size. + bool OptForSize = DAG.getMachineFunction().getFunction().optForSize(); + if (!OptForSize) { + if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, + Subtarget, DAG)) + return Masked; + } + + // Otherwise load an immediate into a GPR, cast to k-register, and use a + // masked move. MVT IntegerType = MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType); @@ -12766,7 +12798,7 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, return Blend; if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask, - Zeroable, DAG)) + Zeroable, Subtarget, DAG)) return Masked; // Use dedicated unpack instructions for masks that match their pattern. @@ -13467,7 +13499,7 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask, return Blend; if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask, - Zeroable, DAG)) + Zeroable, Subtarget, DAG)) return Masked; // Use dedicated unpack instructions for masks that match their pattern. @@ -13735,7 +13767,7 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask, } if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask, - Zeroable, DAG)) + Zeroable, Subtarget, DAG)) return Masked; // Use dedicated unpack instructions for masks that match their pattern. @@ -15571,7 +15603,7 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, // No floating point type available, if we can't use the bit operations // for masking/blending then decompose into 128-bit vectors. if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, - DAG)) + Subtarget, DAG)) return V; if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG)) return V; |

