summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86InstrAVX512.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrAVX512.td')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td65
1 files changed, 35 insertions, 30 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 05e346dec5a..8cf5bb43717 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1067,23 +1067,6 @@ def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$sr
SSEPackedDouble>, EVEX, EVEX_V512,
OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
-// Use vmovaps/vmovups for AVX-512 integer load/store.
-// 512-bit load/store
-def : Pat<(alignedloadv8i64 addr:$src),
- (VMOVAPSZrm addr:$src)>;
-def : Pat<(loadv8i64 addr:$src),
- (VMOVUPSZrm addr:$src)>;
-
-def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst),
- (VMOVAPSZmr addr:$dst, VR512:$src)>;
-def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst),
- (VMOVAPSZmr addr:$dst, VR512:$src)>;
-
-def : Pat<(store (v8i64 VR512:$src), addr:$dst),
- (VMOVUPDZmr addr:$dst, VR512:$src)>;
-def : Pat<(store (v16i32 VR512:$src), addr:$dst),
- (VMOVUPSZmr addr:$dst, VR512:$src)>;
-
let neverHasSideEffects = 1 in {
def VMOVDQA32rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst),
(ins VR512:$src),
@@ -1115,25 +1098,36 @@ def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst),
}
}
-multiclass avx512_mov_int<bits<8> opc, string asm, RegisterClass RC,
- RegisterClass KRC,
+// 512-bit aligned load/store
+def : Pat<(alignedloadv8i64 addr:$src), (VMOVDQA64rm addr:$src)>;
+def : Pat<(alignedloadv16i32 addr:$src), (VMOVDQA32rm addr:$src)>;
+
+def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst),
+ (VMOVDQA64mr addr:$dst, VR512:$src)>;
+def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst),
+ (VMOVDQA32mr addr:$dst, VR512:$src)>;
+
+multiclass avx512_mov_int<bits<8> load_opc, bits<8> store_opc, string asm,
+ RegisterClass RC, RegisterClass KRC,
PatFrag ld_frag, X86MemOperand x86memop> {
let neverHasSideEffects = 1 in
- def rr : AVX512XSI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>,
- EVEX;
+ def rr : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>, EVEX;
let canFoldAsLoad = 1 in
- def rm : AVX512XSI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (ld_frag addr:$src))]>,
- EVEX;
+ def rm : AVX512XSI<load_opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (ld_frag addr:$src))]>, EVEX;
+let mayStore = 1 in
+ def mr : AVX512XSI<store_opc, MRMDestMem, (outs),
+ (ins x86memop:$dst, VR512:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>, EVEX;
let Constraints = "$src1 = $dst" in {
- def rrk : AVX512XSI<opc, MRMSrcReg, (outs RC:$dst),
+ def rrk : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, KRC:$mask, RC:$src2),
!strconcat(asm,
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), []>,
EVEX, EVEX_K;
- def rmk : AVX512XSI<opc, MRMSrcMem, (outs RC:$dst),
+ def rmk : AVX512XSI<load_opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, KRC:$mask, x86memop:$src2),
!strconcat(asm,
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
@@ -1141,11 +1135,22 @@ let Constraints = "$src1 = $dst" in {
}
}
-defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, memopv16i32, i512mem>,
+defm VMOVDQU32 : avx512_mov_int<0x6F, 0x7F, "vmovdqu32", VR512, VK16WM,
+ memopv16i32, i512mem>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, memopv8i64, i512mem>,
+defm VMOVDQU64 : avx512_mov_int<0x6F, 0x7F, "vmovdqu64", VR512, VK8WM,
+ memopv8i64, i512mem>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+// 512-bit unaligned load/store
+def : Pat<(loadv8i64 addr:$src), (VMOVDQU64rm addr:$src)>;
+def : Pat<(loadv16i32 addr:$src), (VMOVDQU32rm addr:$src)>;
+
+def : Pat<(store (v8i64 VR512:$src), addr:$dst),
+ (VMOVDQU64mr addr:$dst, VR512:$src)>;
+def : Pat<(store (v16i32 VR512:$src), addr:$dst),
+ (VMOVDQU32mr addr:$dst, VR512:$src)>;
+
let AddedComplexity = 20 in {
def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1),
(v16f32 VR512:$src2))),
OpenPOWER on IntegriCloud