summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td2
-rw-r--r--llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll28
2 files changed, 16 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index def35bbef63..329ea0c793b 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -796,6 +796,7 @@ defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
// A 128-bit subvector insert to the first 512-bit vector position
// is a subregister copy that needs no instruction.
+let AddedComplexity = 25 in { // to give priority over vinsertf128rm
def : Pat<(v8i64 (insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0))),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
def : Pat<(v8f64 (insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0))),
@@ -821,6 +822,7 @@ def : Pat<(v32i16 (insert_subvector undef, (v16i16 VR256X:$src), (iPTR 0))),
(INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
def : Pat<(v64i8 (insert_subvector undef, (v32i8 VR256X:$src), (iPTR 0))),
(INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
+}
// vextractps - extract 32 bits from XMM
def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll
index fafd796d299..bb9a342ae9a 100644
--- a/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll
+++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll
@@ -8,17 +8,17 @@
define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8f64_2f64_12u4:
; ALL: # BB#0:
-; ALL-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $0, 16(%rdi), %zmm0, %zmm1
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovupd 16(%rdi), %ymm0
+; ALL-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm1
+; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_8f64_2f64_12u4:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vinsertf128 $1, 64(%eax), %ymm0, %ymm0
-; X32-AVX512F-NEXT: vinsertf64x4 $0, 16(%eax), %zmm0, %zmm1
-; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-AVX512F-NEXT: vmovupd 16(%eax), %ymm0
+; X32-AVX512F-NEXT: vinsertf128 $1, 64(%eax), %ymm0, %ymm1
+; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 1
%ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
@@ -35,19 +35,19 @@ define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable n
define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8f64_2f64_23z5:
; ALL: # BB#0:
-; ALL-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; ALL-NEXT: vinsertf128 $1, 80(%rdi), %ymm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $0, 32(%rdi), %zmm0, %zmm1
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovupd 32(%rdi), %ymm0
+; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vinsertf128 $1, 80(%rdi), %ymm1, %ymm1
+; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_8f64_2f64_23z5:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; X32-AVX512F-NEXT: vinsertf128 $1, 80(%eax), %ymm0, %ymm0
-; X32-AVX512F-NEXT: vinsertf64x4 $0, 32(%eax), %zmm0, %zmm1
-; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-AVX512F-NEXT: vmovupd 32(%eax), %ymm0
+; X32-AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; X32-AVX512F-NEXT: vinsertf128 $1, 80(%eax), %ymm1, %ymm1
+; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
%ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 3
OpenPOWER on IntegriCloud