summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td72
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics.ll2
-rw-r--r--llvm/test/CodeGen/X86/avx512-select.ll2
3 files changed, 45 insertions, 31 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 07e84c3a17c..4ba09b2adf6 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -3025,39 +3025,53 @@ def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
// AVX-512 MOVSS, MOVSD
//===----------------------------------------------------------------------===//
-multiclass avx512_move_scalar <string asm, SDNode OpNode,
+multiclass avx512_move_scalar<string asm, SDNode OpNode,
X86VectorVTInfo _> {
- defm rr_Int : AVX512_maskable_scalar<0x10, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2),
- asm, "$src2, $src1","$src1, $src2",
- (_.VT (OpNode (_.VT _.RC:$src1),
- (_.VT _.RC:$src2))),
- IIC_SSE_MOV_S_RR>, EVEX_4V;
- let Constraints = "$src1 = $dst" in
- defm rm_Int : AVX512_maskable_3src_scalar<0x10, MRMSrcMem, _,
- (outs _.RC:$dst),
- (ins _.ScalarMemOp:$src),
- asm,"$src","$src",
- (_.VT (OpNode (_.VT _.RC:$src1),
- (_.VT (scalar_to_vector
- (_.ScalarLdFrag addr:$src)))))>, EVEX;
- let isCodeGenOnly = 1 in {
- def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
- (ins _.RC:$src1, _.FRC:$src2),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1,
- (scalar_to_vector _.FRC:$src2))))],
- _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V;
- def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
- _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX;
+ def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.FRC:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1,
+ (scalar_to_vector _.FRC:$src2))))],
+ _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V;
+ def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
+ "$dst {${mask}} {z}, $src1, $src2}"),
+ [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
+ _.ImmAllZerosV)))],
+ _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ;
+ let Constraints = "$src0 = $dst" in
+ def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, $src2}"),
+ [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
+ (_.VT _.RC:$src0))))],
+ _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K;
+ def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
+ _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX;
+ let mayLoad = 1, hasSideEffects = 0 in {
+ let Constraints = "$src0 = $dst" in
+ def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
+ !strconcat(asm, "\t{$src, $dst {${mask}}|",
+ "$dst {${mask}}, $src}"),
+ [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K;
+ def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
+ !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
+ "$dst {${mask}} {z}, $src}"),
+ [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ;
}
def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>,
EVEX;
- let mayStore = 1 in
+ let mayStore = 1, hasSideEffects = 0 in
def mrk: AVX512PI<0x11, MRMDestMem, (outs),
(ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
!strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
@@ -3071,11 +3085,11 @@ defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
- (COPY_TO_REGCLASS (VMOVSSZrr_Intk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
+ (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
- (COPY_TO_REGCLASS (VMOVSDZrr_Intk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
+ (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index cab1aae1142..cc5b7916c0b 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -5413,7 +5413,7 @@ define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
ret <2 x double> %res
diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll
index fab6b3dda24..d4b9c19202e 100644
--- a/llvm/test/CodeGen/X86/avx512-select.ll
+++ b/llvm/test/CodeGen/X86/avx512-select.ll
@@ -52,7 +52,7 @@ define double @select03(double %a, double %b, double %c, double %eps) {
; CHECK: ## BB#0:
; CHECK-NEXT: vcmplesd %xmm0, %xmm3, %k1
; CHECK-NEXT: vmovsd %xmm2, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
%cmp = fcmp oge double %a, %eps
%cond = select i1 %cmp, double %c, double %b
OpenPOWER on IntegriCloud