diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll | 28 |
2 files changed, 16 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index def35bbef63..329ea0c793b 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -796,6 +796,7 @@ defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, // A 128-bit subvector insert to the first 512-bit vector position // is a subregister copy that needs no instruction. +let AddedComplexity = 25 in { // to give priority over vinsertf128rm def : Pat<(v8i64 (insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0))), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>; def : Pat<(v8f64 (insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0))), @@ -821,6 +822,7 @@ def : Pat<(v32i16 (insert_subvector undef, (v16i16 VR256X:$src), (iPTR 0))), (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; def : Pat<(v64i8 (insert_subvector undef, (v32i8 VR256X:$src), (iPTR 0))), (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; +} // vextractps - extract 32 bits from XMM def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll index fafd796d299..bb9a342ae9a 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll @@ -8,17 +8,17 @@ define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable noinline ssp { ; ALL-LABEL: merge_8f64_2f64_12u4: ; ALL: # BB#0: -; ALL-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0 -; ALL-NEXT: vinsertf64x4 $0, 16(%rdi), %zmm0, %zmm1 -; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 +; ALL-NEXT: vmovupd 16(%rdi), %ymm0 +; ALL-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm1 +; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; ALL-NEXT: retq ; ; X32-AVX512F-LABEL: merge_8f64_2f64_12u4: ; X32-AVX512F: # BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: vinsertf128 $1, 64(%eax), %ymm0, %ymm0 -; X32-AVX512F-NEXT: vinsertf64x4 $0, 16(%eax), %zmm0, %zmm1 -; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 +; X32-AVX512F-NEXT: vmovupd 16(%eax), %ymm0 +; X32-AVX512F-NEXT: vinsertf128 $1, 64(%eax), %ymm0, %ymm1 +; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-AVX512F-NEXT: retl %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 1 %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2 @@ -35,19 +35,19 @@ define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable n define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable noinline ssp { ; ALL-LABEL: merge_8f64_2f64_23z5: ; ALL: # BB#0: -; ALL-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; ALL-NEXT: vinsertf128 $1, 80(%rdi), %ymm0, %ymm0 -; ALL-NEXT: vinsertf64x4 $0, 32(%rdi), %zmm0, %zmm1 -; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 +; ALL-NEXT: vmovupd 32(%rdi), %ymm0 +; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vinsertf128 $1, 80(%rdi), %ymm1, %ymm1 +; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; ALL-NEXT: retq ; ; X32-AVX512F-LABEL: merge_8f64_2f64_23z5: ; X32-AVX512F: # BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; X32-AVX512F-NEXT: vinsertf128 $1, 80(%eax), %ymm0, %ymm0 -; X32-AVX512F-NEXT: vinsertf64x4 $0, 32(%eax), %zmm0, %zmm1 -; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 +; X32-AVX512F-NEXT: vmovupd 32(%eax), %ymm0 +; X32-AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X32-AVX512F-NEXT: vinsertf128 $1, 80(%eax), %ymm1, %ymm1 +; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-AVX512F-NEXT: retl %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2 %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 3 |