summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorElena Demikhovsky <elena.demikhovsky@intel.com>2015-02-25 09:46:31 +0000
committerElena Demikhovsky <elena.demikhovsky@intel.com>2015-02-25 09:46:31 +0000
commit56eadcf5ce7bb1cc5d3a4d25f40d73c882ef9087 (patch)
tree7a286df920ed74eab31897c4d5dc3d3303a955c9 /llvm/lib
parent3eff5f46d7821f2fa8529c93155f70f717cb71ee (diff)
downloadbcm5719-llvm-56eadcf5ce7bb1cc5d3a4d25f40d73c882ef9087.tar.gz
bcm5719-llvm-56eadcf5ce7bb1cc5d3a4d25f40d73c882ef9087.zip
AVX-512: Gather and Scatter patterns
Gather and scatter instructions additionally write to one of the source operands - mask register. In this case Gather has 2 destination values - the loaded value and the mask. Till now we did not support code gen pattern for gather - the instruction was generated from intrinsic only and machine node was hardcoded. When we introduce the masked_gather node, we need to select instruction automatically, in the standard way. I added a flag "hasTwoExplicitDefs" that allows to handle 2 destination operands. (Some code in the X86InstrFragmentsSIMD.td is commented out, just to split one big patch in many small patches) llvm-svn: 230471
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td95
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td54
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td3
3 files changed, 108 insertions, 44 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 0b2392b9bce..4923bc5f1dd 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4919,74 +4919,81 @@ defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext
//===----------------------------------------------------------------------===//
// GATHER - SCATTER Operations
-multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
- RegisterClass RC, X86MemOperand memop> {
-let mayLoad = 1,
+multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86MemOperand memop, PatFrag GatherNode> {
+let mayLoad = 1, hasTwoExplicitDefs = 1,
Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
- def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
- (ins RC:$src1, KRC:$mask, memop:$src2),
+ def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb),
+ (ins _.RC:$src1, _.KRCWM:$mask, memop:$src2),
!strconcat(OpcodeStr,
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
- []>, EVEX, EVEX_K;
+ [(set _.RC:$dst, _.KRCWM:$mask_wb,
+ (_.VT (GatherNode (_.VT _.RC:$src1), _.KRCWM:$mask,
+ vectoraddr:$src2)))]>, EVEX, EVEX_K,
+ EVEX_CD8<_.EltSize, CD8VT1>;
}
let ExeDomain = SSEPackedDouble in {
-defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", v8f64_info, vy64xmem,
+ mgatherv8i32>, EVEX_V512, VEX_W;
+defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", v8f64_info, vz64mem,
+ mgatherv8i64>, EVEX_V512, VEX_W;
}
let ExeDomain = SSEPackedSingle in {
-defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
- EVEX_V512, EVEX_CD8<32, CD8VT1>;
-defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
- EVEX_V512, EVEX_CD8<32, CD8VT1>;
+defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", v16f32_info, vz32mem,
+ mgatherv16i32>, EVEX_V512;
+defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", v8f32x_info, vz64mem,
+ mgatherv8i64>, EVEX_V512;
}
-defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
- EVEX_V512, EVEX_CD8<32, CD8VT1>;
+defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", v8i64_info, vy64xmem,
+ mgatherv8i32>, EVEX_V512, VEX_W;
+defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", v16i32_info, vz32mem,
+ mgatherv16i32>, EVEX_V512;
+
+defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", v8i64_info, vz64mem,
+ mgatherv8i64>, EVEX_V512, VEX_W;
+defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", v8i32x_info, vz64mem,
+ mgatherv8i64>, EVEX_V512;
-defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
- EVEX_V512, EVEX_CD8<32, CD8VT1>;
+multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86MemOperand memop, PatFrag ScatterNode> {
-multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
- RegisterClass RC, X86MemOperand memop> {
let mayStore = 1, Constraints = "$mask = $mask_wb" in
- def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
- (ins memop:$dst, KRC:$mask, RC:$src2),
+
+ def mr : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
+ (ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
!strconcat(OpcodeStr,
- "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
- []>, EVEX, EVEX_K;
+ "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
+ [(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
+ _.KRCWM:$mask, vectoraddr:$dst))]>,
+ EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
}
let ExeDomain = SSEPackedDouble in {
-defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", v8f64_info, vy64xmem,
+ mscatterv8i32>, EVEX_V512, VEX_W;
+defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", v8f64_info, vz64mem,
+ mscatterv8i64>, EVEX_V512, VEX_W;
}
let ExeDomain = SSEPackedSingle in {
-defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
- EVEX_V512, EVEX_CD8<32, CD8VT1>;
-defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
- EVEX_V512, EVEX_CD8<32, CD8VT1>;
+defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", v16f32_info, vz32mem,
+ mscatterv16i32>, EVEX_V512;
+defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", v8f32x_info, vz64mem,
+ mscatterv8i64>, EVEX_V512;
}
-defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
- EVEX_V512, EVEX_CD8<32, CD8VT1>;
+defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", v8i64_info, vy64xmem,
+ mscatterv8i32>, EVEX_V512, VEX_W;
+defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", v16i32_info, vz32mem,
+ mscatterv16i32>, EVEX_V512;
-defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
- EVEX_V512, EVEX_CD8<32, CD8VT1>;
+defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", v8i64_info, vz64mem,
+ mscatterv8i64>, EVEX_V512, VEX_W;
+defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", v8i32x_info, vz64mem,
+ mscatterv8i64>, EVEX_V512;
// prefetch
multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index a80843fc48f..bf515a847db 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -304,6 +304,8 @@ def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>;
def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>;
def X86RndScale : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>;
+def X86mgather : SDNode<"X86ISD::GATHER", SDTypeProfile<1, 3,
+ [SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>]>>;
def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
@@ -524,6 +526,58 @@ def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
return false;
}]>;
+def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_gather node:$src1, node:$src2, node:$src3) , [{
+ //if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+ // return (Mgt->getIndex().getValueType() == MVT::v8i32 ||
+ // Mgt->getBasePtr().getValueType() == MVT::v8i32);
+ //return false;
+ return N != 0;
+}]>;
+
+def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_gather node:$src1, node:$src2, node:$src3) , [{
+ //if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+ // return (Mgt->getIndex().getValueType() == MVT::v8i64 ||
+ // Mgt->getBasePtr().getValueType() == MVT::v8i64);
+ //return false;
+ return N != 0;
+}]>;
+def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_gather node:$src1, node:$src2, node:$src3) , [{
+ //if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+ // return (Mgt->getIndex().getValueType() == MVT::v16i32 ||
+ // Mgt->getBasePtr().getValueType() == MVT::v16i32);
+ //return false;
+ return N != 0;
+}]>;
+
+def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+ //if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+ // return (Sc->getIndex().getValueType() == MVT::v8i32 ||
+ // Sc->getBasePtr().getValueType() == MVT::v8i32);
+ //return false;
+ return N != 0;
+}]>;
+
+def mscatterv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+ //if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+ // return (Sc->getIndex().getValueType() == MVT::v8i64 ||
+ // Sc->getBasePtr().getValueType() == MVT::v8i64);
+ //return false;
+ return N != 0;
+}]>;
+def mscatterv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+ //if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+ // return (Sc->getIndex().getValueType() == MVT::v16i32 ||
+ // Sc->getBasePtr().getValueType() == MVT::v16i32);
+ //return false;
+ return N != 0;
+}]>;
+
// 128-bit bitconvert pattern fragments
def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 7ab8822ff31..9881cafa84d 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -713,6 +713,9 @@ def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
def tls64baseaddr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
+def vectoraddr : ComplexPattern<iPTR, 5, "SelectAddr", [],[SDNPWantParent]>;
+//def vectoraddr : ComplexPattern<iPTR, 5, "SelectVectorAddr", [],[SDNPWantParent]>;
+
//===----------------------------------------------------------------------===//
// X86 Instruction Predicate Definitions.
def HasCMov : Predicate<"Subtarget->hasCMov()">;
OpenPOWER on IntegriCloud