summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorElena Demikhovsky <elena.demikhovsky@intel.com>2017-06-22 06:47:41 +0000
committerElena Demikhovsky <elena.demikhovsky@intel.com>2017-06-22 06:47:41 +0000
commit2dac0b4d583f685691635448de73fdd0f6191582 (patch)
tree72323032aec0b2f4067bc3cf40263e6e40d489d7 /llvm/lib
parent3c4933fcc6014e1f07d32ac35cd3c8513bc302f1 (diff)
downloadbcm5719-llvm-2dac0b4d583f685691635448de73fdd0f6191582.tar.gz
bcm5719-llvm-2dac0b4d583f685691635448de73fdd0f6191582.zip
AVX-512: Lowering Masked Gather intrinsic - fixed a bug
Masked gather for vector length 2 is lowered incorrectly for element type i32. The type <2 x i32> was automatically extended to <2 x i64> and we generated VPGATHERQQ instead of VPGATHERQD. The type <2 x float> is extended to <4 x float>, so there is no bug for this type, but the sequence may be more optimal. In this patch I'm fixing <2 x i32>bug and optimizing <2 x float> sequence for GATHERs only. The same fix should be done for Scatters as well. Differential revision: https://reviews.llvm.org/D34343 llvm-svn: 305987
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp28
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp52
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h18
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td2
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td12
5 files changed, 103 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 2a1633de0a2..3c4589ab18f 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -204,6 +204,11 @@ namespace {
bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
+ template <class GatherScatterSDNode>
+ bool selectAddrOfGatherScatterNode(GatherScatterSDNode *Parent, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
bool selectMOV64Imm32(SDValue N, SDValue &Imm);
bool selectLEAAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
@@ -1415,13 +1420,10 @@ bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
return false;
}
-bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
- SDValue &Scale, SDValue &Index,
- SDValue &Disp, SDValue &Segment) {
-
- MaskedGatherScatterSDNode *Mgs = dyn_cast<MaskedGatherScatterSDNode>(Parent);
- if (!Mgs)
- return false;
+template <class GatherScatterSDNode>
+bool X86DAGToDAGISel::selectAddrOfGatherScatterNode(
+ GatherScatterSDNode *Mgs, SDValue N, SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp, SDValue &Segment) {
X86ISelAddressMode AM;
unsigned AddrSpace = Mgs->getPointerInfo().getAddrSpace();
// AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
@@ -1453,6 +1455,18 @@ bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
return true;
}
+bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment) {
+ if (auto Mgs = dyn_cast<MaskedGatherScatterSDNode>(Parent))
+ return selectAddrOfGatherScatterNode<MaskedGatherScatterSDNode>(
+ Mgs, N, Base, Scale, Index, Disp, Segment);
+ if (auto X86Gather = dyn_cast<X86MaskedGatherSDNode>(Parent))
+ return selectAddrOfGatherScatterNode<X86MaskedGatherSDNode>(
+ X86Gather, N, Base, Scale, Index, Disp, Segment);
+ return false;
+}
+
/// Returns true if it is able to pattern match an addressing mode.
/// It returns the operands which make up the maximal addressing mode it can
/// match by reference.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6d6c44bbbca..6487b4aa626 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23708,6 +23708,57 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
SDValue RetOps[] = {Exract, NewGather.getValue(1)};
return DAG.getMergeValues(RetOps, dl);
}
+ if (N->getMemoryVT() == MVT::v2i32 && Subtarget.hasVLX()) {
+ // There is a special case when the return type is v2i32 is illegal and
+ // the type legaizer extended it to v2i64. Without this conversion we end up
+ // with VPGATHERQQ (reading q-words from the memory) instead of VPGATHERQD.
+ // In order to avoid this situation, we'll build an X86 specific Gather node
+ // with index v2i64 and value type v4i32.
+ assert(VT == MVT::v2i64 && Src0.getValueType() == MVT::v2i64 &&
+ "Unexpected type in masked gather");
+ Src0 = DAG.getVectorShuffle(MVT::v4i32, dl,
+ DAG.getBitcast(MVT::v4i32, Src0),
+ DAG.getUNDEF(MVT::v4i32), { 0, 2, -1, -1 });
+ // The mask should match the destination type. Extending mask with zeroes
+ // is not necessary since instruction itself reads only two values from
+ // memory.
+ Mask = ExtendToType(Mask, MVT::v4i1, DAG, false);
+ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
+ SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
+ DAG.getVTList(MVT::v4i32, MVT::Other), Ops, dl, N->getMemoryVT(),
+ N->getMemOperand());
+
+ SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, MVT::v2i64,
+ NewGather.getValue(0), DAG);
+ SDValue RetOps[] = { Sext, NewGather.getValue(1) };
+ return DAG.getMergeValues(RetOps, dl);
+ }
+ if (N->getMemoryVT() == MVT::v2f32 && Subtarget.hasVLX()) {
+ // This transformation is for optimization only.
+ // The type legalizer extended mask and index to 4 elements vector
+ // in order to match requirements of the common gather node - same
+ // vector width of index and value. X86 Gather node allows mismatch
+ // of vector width in order to select more optimal instruction at the
+ // end.
+ assert(VT == MVT::v4f32 && Src0.getValueType() == MVT::v4f32 &&
+ "Unexpected type in masked gather");
+ if (Mask.getOpcode() == ISD::CONCAT_VECTORS &&
+ ISD::isBuildVectorAllZeros(Mask.getOperand(1).getNode()) &&
+ Index.getOpcode() == ISD::CONCAT_VECTORS &&
+ Index.getOperand(1).isUndef()) {
+ Mask = ExtendToType(Mask.getOperand(0), MVT::v4i1, DAG, false);
+ Index = Index.getOperand(0);
+ } else
+ return Op;
+ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
+ SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
+ DAG.getVTList(MVT::v4f32, MVT::Other), Ops, dl, N->getMemoryVT(),
+ N->getMemOperand());
+
+ SDValue RetOps[] = { NewGather.getValue(0), NewGather.getValue(1) };
+ return DAG.getMergeValues(RetOps, dl);
+
+ }
return Op;
}
@@ -24511,6 +24562,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND";
case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND";
case X86ISD::LWPINS: return "X86ISD::LWPINS";
+ case X86ISD::MGATHER: return "X86ISD::MGATHER";
}
return nullptr;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index f51b6641db2..dd0f84cf7b6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -615,7 +615,10 @@ namespace llvm {
// Vector truncating store with unsigned/signed saturation
VTRUNCSTOREUS, VTRUNCSTORES,
// Vector truncating masked store with unsigned/signed saturation
- VMTRUNCSTOREUS, VMTRUNCSTORES
+ VMTRUNCSTOREUS, VMTRUNCSTORES,
+
+ // X86 specific gather
+ MGATHER
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
@@ -1397,6 +1400,19 @@ namespace llvm {
}
};
+ // X86 specific Gather node.
+ class X86MaskedGatherSDNode : public MaskedGatherScatterSDNode {
+ public:
+ X86MaskedGatherSDNode(unsigned Order,
+ const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+ MachineMemOperand *MMO)
+ : MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT, MMO)
+ {}
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == X86ISD::MGATHER;
+ }
+ };
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 2620679df25..d46262573f7 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7869,7 +7869,7 @@ let Predicates = [HasVLX] in {
defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
vx128xmem, mgatherv4i32>, EVEX_V128;
defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
- vx64xmem, mgatherv2i64>, EVEX_V128;
+ vx64xmem, X86mgatherv2i64>, EVEX_V128;
}
}
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index c28b35b2297..1cd6320a55e 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -773,6 +773,9 @@ def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
def memopmmx : PatFrag<(ops node:$ptr), (x86mmx (memop64 node:$ptr))>;
+def X86masked_gather : SDNode<"X86ISD::MGATHER", SDTMaskedGather,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_gather node:$src1, node:$src2, node:$src3) , [{
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
@@ -796,6 +799,15 @@ def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
Mgt->getBasePtr().getValueType() == MVT::v2i64);
return false;
}]>;
+def X86mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
+ if (X86MaskedGatherSDNode *Mgt = dyn_cast<X86MaskedGatherSDNode>(N))
+ return (Mgt->getIndex().getValueType() == MVT::v2i64 ||
+ Mgt->getBasePtr().getValueType() == MVT::v2i64) &&
+ (Mgt->getMemoryVT() == MVT::v2i32 ||
+ Mgt->getMemoryVT() == MVT::v2f32);
+ return false;
+}]>;
def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_gather node:$src1, node:$src2, node:$src3) , [{
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
OpenPOWER on IntegriCloud