summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-03-20 21:30:20 +0000
committerCraig Topper <craig.topper@intel.com>2019-03-20 21:30:20 +0000
commit03675533043ddbf2ad27f260ccc410952977f574 (patch)
treedd1b6e7e2190944a9553bef01e6a73d70e8e1a4e /llvm/lib
parentbbcb95a64e186dacd66034340de81b5efc8c000e (diff)
downloadbcm5719-llvm-03675533043ddbf2ad27f260ccc410952977f574.tar.gz
bcm5719-llvm-03675533043ddbf2ad27f260ccc410952977f574.zip
[X86] Call lowerShuffleAsBitMask for 512-bit vectors in lowerShuffleAsBlend.
This patch enables the use of lowerShuffleAsBitMask for 512-bit blends before falling back to move immedate, GPR to k-register, and masked op. I had to make some changes to support v8i64 when i64 is not a legal type. And to support floating point types. This trades a load for the move immediate and GPR move which is higher latency. But its probably better for register pressure not having to hop through other register classes. The load+and should play better with LICM and rematerialization I think. Differential Revision: https://reviews.llvm.org/D59479 llvm-svn: 356618
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp54
1 files changed, 43 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1185b8bdb7e..d871f0ce3b7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -10364,11 +10364,30 @@ static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
/// one of the inputs being zeroable.
static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
- const APInt &Zeroable, SelectionDAG &DAG) {
- assert(!VT.isFloatingPoint() && "Floating point types are not supported");
+ const APInt &Zeroable,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ MVT MaskVT = VT;
MVT EltVT = VT.getVectorElementType();
- SDValue Zero = DAG.getConstant(0, DL, EltVT);
- SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT);
+ SDValue Zero, AllOnes;
+ // Use f64 if i64 isn't legal.
+ if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
+ EltVT = MVT::f64;
+ MaskVT = MVT::getVectorVT(EltVT, Mask.size());
+ }
+
+ MVT LogicVT = VT;
+ if (EltVT == MVT::f32 || EltVT == MVT::f64) {
+ Zero = DAG.getConstantFP(0.0, DL, MVT::f64);
+ AllOnes = DAG.getConstantFP(APInt::getAllOnesValue(64).bitsToDouble(), DL,
+ EltVT);
+ LogicVT = MVT::getVectorVT(EltVT == MVT::f64 ? MVT::i64 : MVT::i32,
+ Mask.size());
+ } else {
+ Zero = DAG.getConstant(0, DL, EltVT);
+ AllOnes = DAG.getAllOnesConstant(DL, EltVT);
+ }
+
SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
SDValue V;
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
@@ -10386,8 +10405,11 @@ static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
if (!V)
return SDValue(); // No non-zeroable elements!
- SDValue VMask = DAG.getBuildVector(VT, DL, VMaskOps);
- return DAG.getNode(ISD::AND, DL, VT, V, VMask);
+ SDValue VMask = DAG.getBuildVector(MaskVT, DL, VMaskOps);
+ VMask = DAG.getBitcast(LogicVT, VMask);
+ V = DAG.getBitcast(LogicVT, V);
+ SDValue And = DAG.getNode(ISD::AND, DL, LogicVT, V, VMask);
+ return DAG.getBitcast(VT, And);
}
/// Try to emit a blend instruction for a shuffle using bit math.
@@ -10552,7 +10574,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
// Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
- DAG))
+ Subtarget, DAG))
return Masked;
if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
@@ -10610,6 +10632,16 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
case MVT::v16i32:
case MVT::v32i16:
case MVT::v64i8: {
+ // Attempt to lower to a bitmask if we can. Only if not optimizing for size.
+ bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+ if (!OptForSize) {
+ if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
+ Subtarget, DAG))
+ return Masked;
+ }
+
+ // Otherwise load an immediate into a GPR, cast to k-register, and use a
+ // masked move.
MVT IntegerType =
MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
@@ -12766,7 +12798,7 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Blend;
if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask,
- Zeroable, DAG))
+ Zeroable, Subtarget, DAG))
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
@@ -13467,7 +13499,7 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Blend;
if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask,
- Zeroable, DAG))
+ Zeroable, Subtarget, DAG))
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
@@ -13735,7 +13767,7 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
}
if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask,
- Zeroable, DAG))
+ Zeroable, Subtarget, DAG))
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
@@ -15571,7 +15603,7 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
// No floating point type available, if we can't use the bit operations
// for masking/blending then decompose into 128-bit vectors.
if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
- DAG))
+ Subtarget, DAG))
return V;
if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
return V;
OpenPOWER on IntegriCloud