summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/IR/IntrinsicsX86.td12
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td39
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics.ll16
-rw-r--r--llvm/test/MC/X86/avx512-encodings.s4
4 files changed, 66 insertions, 5 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 018aa9a87d0..5f8ae27839c 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -3184,6 +3184,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[IntrNoMem]>;
}
+let TargetPrefix = "x86" in {
+ def int_x86_avx512_mask_valign_q_512 : GCCBuiltin<"__builtin_ia32_alignq512_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_valign_d_512 : GCCBuiltin<"__builtin_ia32_alignd512_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+}
+
// Misc.
let TargetPrefix = "x86" in {
def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 7328d992c1f..5284c3f6510 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4461,9 +4461,9 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1,
(memopv8i64 addr:$src2), (i8 imm:$imm))),
(VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
-multiclass avx512_valign<string Suffix, RegisterClass RC,
- X86MemOperand x86memop, ValueType IntVT,
- ValueType FloatVT> {
+multiclass avx512_valign<string Suffix, RegisterClass RC, RegisterClass KRC,
+ RegisterClass MRC, X86MemOperand x86memop,
+ ValueType IntVT, ValueType FloatVT> {
def rri : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, i8imm:$src3),
!strconcat("valign"##Suffix,
@@ -4473,10 +4473,39 @@ multiclass avx512_valign<string Suffix, RegisterClass RC,
(IntVT (X86VAlign RC:$src2, RC:$src1,
(i8 imm:$src3))))]>, EVEX_4V;
+ let Constraints = "$src0 = $dst", AddedComplexity=30 in
+ def rrik : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2, i8imm:$src3),
+ !strconcat("valign"##Suffix,
+ " \t{$src3, $src2, $src1, $mask, $dst|"
+ "$dst, $mask, $src1, $src2, $src3}"),
+ [(set RC:$dst,
+ (IntVT (vselect KRC:$mask,
+ (X86VAlign RC:$src2, RC:$src1,
+ (i8 imm:$src3)),
+ RC:$src0)))]>,
+ EVEX_4V, EVEX_K;
+
// Also match valign of packed floats.
def : Pat<(FloatVT (X86VAlign RC:$src1, RC:$src2, (i8 imm:$imm))),
(!cast<Instruction>(NAME##rri) RC:$src2, RC:$src1, imm:$imm)>;
+ // Non-masking intrinsic call.
+ def : Pat<(IntVT
+ (!cast<Intrinsic>("int_x86_avx512_mask_valign_"##Suffix##"_512")
+ RC:$src1, RC:$src2, imm:$src3,
+ (IntVT (bitconvert (v16i32 immAllZerosV))), -1)),
+ (!cast<Instruction>(NAME#rri) RC:$src1, RC:$src2, imm:$src3)>;
+
+ // Masking intrinsic call.
+ def : Pat<(IntVT
+ (!cast<Intrinsic>("int_x86_avx512_mask_valign_"##Suffix##"_512")
+ RC:$src1, RC:$src2, imm:$src3,
+ RC:$src4, MRC:$mask)),
+ (!cast<Instruction>(NAME#rrik) RC:$src4,
+ (COPY_TO_REGCLASS MRC:$mask, KRC), RC:$src1,
+ RC:$src2, imm:$src3)>;
+
let mayLoad = 1 in
def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, i8imm:$src3),
@@ -4485,9 +4514,9 @@ multiclass avx512_valign<string Suffix, RegisterClass RC,
"$dst, $src1, $src2, $src3}"),
[]>, EVEX_4V;
}
-defm VALIGND : avx512_valign<"d", VR512, i512mem, v16i32, v16f32>,
+defm VALIGND : avx512_valign<"d", VR512, VK16WM, GR16, i512mem, v16i32, v16f32>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VALIGNQ : avx512_valign<"q", VR512, i512mem, v8i64, v8f64>,
+defm VALIGNQ : avx512_valign<"q", VR512, VK8WM, GR8, i512mem, v8i64, v8f64>,
VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
// Helper fragments to match sext vXi1 to vXiY.
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index 6f34d4596f9..eaaf915b0cb 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -611,3 +611,19 @@ define <8 x i64> @test_vmovntdqa(i8 *%x) {
}
declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
+
+define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
+; CHECK-LABEL: test_valign_q:
+; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
+ %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
+; CHECK-LABEL: test_mask_valign_q:
+; CHECK: valignq $2, %zmm1, %zmm0, %k1, %zmm2
+ %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask)
+ ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8)
diff --git a/llvm/test/MC/X86/avx512-encodings.s b/llvm/test/MC/X86/avx512-encodings.s
index e71b83cfd30..e96056d7973 100644
--- a/llvm/test/MC/X86/avx512-encodings.s
+++ b/llvm/test/MC/X86/avx512-encodings.s
@@ -3799,3 +3799,7 @@ vpermi2q 0x80(%rax,%rbx,2), %zmm2, %zmm26 {%k3}
// CHECK: vpermt2d
// CHECK: encoding: [0x62,0x32,0x4d,0xc2,0x7e,0x24,0xad,0x05,0x00,0x00,0x00]
vpermt2d 5(,%r13,4), %zmm22, %zmm12 {%k2} {z}
+
+// CHECK: valignq
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x03,0x4c,0x24,0x04,0x02]
+valignq $2, 0x100(%rsp), %zmm0, %zmm1
OpenPOWER on IntegriCloud