summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp44
1 files changed, 44 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 77fa9ff26f2..3f25cb15441 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3844,6 +3844,7 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
default: return false;
case X86ISD::PSHUFB:
case X86ISD::VPERMILPV:
+ case X86ISD::VPERMIL2:
case X86ISD::VPPERM:
return true;
}
@@ -25288,6 +25289,49 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
+ // With XOP, binary shuffles of 128/256-bit floating point vectors can combine
+ // to VPERMIL2PD/VPERMIL2PS.
+ if ((Depth >= 3 || HasVariableMask) && Subtarget.hasXOP() &&
+ (MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v4f32 ||
+ MaskVT == MVT::v8f32)) {
+ // VPERMIL2 Operation.
+ // Bits[3] - Match Bit.
+ // Bits[2:1] - (Per Lane) PD Shuffle Mask.
+ // Bits[2:0] - (Per Lane) PS Shuffle Mask.
+ unsigned NumLanes = MaskVT.getSizeInBits() / 128;
+ unsigned NumEltsPerLane = NumMaskElts / NumLanes;
+ SmallVector<SDValue, 8> VPerm2Idx;
+ MVT MaskIdxSVT = MVT::getIntegerVT(MaskVT.getScalarSizeInBits());
+ MVT MaskIdxVT = MVT::getVectorVT(MaskIdxSVT, NumMaskElts);
+ unsigned M2ZImm = 0;
+ for (int M : Mask) {
+ if (M == SM_SentinelUndef) {
+ VPerm2Idx.push_back(DAG.getUNDEF(MaskIdxSVT));
+ continue;
+ }
+ if (M == SM_SentinelZero) {
+ M2ZImm = 2;
+ VPerm2Idx.push_back(DAG.getConstant(8, DL, MaskIdxSVT));
+ continue;
+ }
+ int Index = (M % NumEltsPerLane) + ((M / NumMaskElts) * NumEltsPerLane);
+ Index = (MaskVT.getScalarSizeInBits() == 64 ? Index << 1 : Index);
+ VPerm2Idx.push_back(DAG.getConstant(Index, DL, MaskIdxSVT));
+ }
+ V1 = DAG.getBitcast(MaskVT, V1);
+ DCI.AddToWorklist(V1.getNode());
+ V2 = DAG.getBitcast(MaskVT, V2);
+ DCI.AddToWorklist(V2.getNode());
+ SDValue VPerm2MaskOp = DAG.getBuildVector(MaskIdxVT, DL, VPerm2Idx);
+ DCI.AddToWorklist(VPerm2MaskOp.getNode());
+ Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp,
+ DAG.getConstant(M2ZImm, DL, MVT::i8));
+ DCI.AddToWorklist(Res.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+ /*AddTo*/ true);
+ return true;
+ }
+
// If we have 3 or more shuffle instructions or a chain involving a variable
// mask, we can replace them with a single PSHUFB instruction profitably.
// Intel's manuals suggest only using PSHUFB if doing so replacing 5
OpenPOWER on IntegriCloud