summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td2
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td4
-rw-r--r--llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll8
3 files changed, 9 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index f94ba848b3d..ca79f320611 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -3552,6 +3552,8 @@ let Predicates = [HasAVX512] in {
}
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
let AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
+ (VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index d2dfb1b5d7d..ec17ae083ad 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -4822,6 +4822,8 @@ let Predicates = [UseAVX] in {
// AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
// These instructions also write zeros in the high part of a 256-bit register.
let AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
+ (VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
@@ -4851,6 +4853,8 @@ let Predicates = [UseSSE2] in {
(MOV64toPQIrr GR64:$src)>;
}
let AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
+ (MOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(MOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
index 82df614a9c5..e498eb78337 100644
--- a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
+++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
@@ -1137,18 +1137,16 @@ define <4 x float> @merge_4f32_f32_X0YY(float* %ptr0, float* %ptr1) nounwind uwt
; Extension tests.
;
-; FIXME: PR31309
+; PR31309
define <4 x i32> @load_i32_zext_i128_v4i32(i32* %ptr) {
; SSE-LABEL: load_i32_zext_i128_v4i32:
; SSE: # BB#0:
-; SSE-NEXT: movl (%rdi), %eax
-; SSE-NEXT: movd %rax, %xmm0
+; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: retq
;
; AVX-LABEL: load_i32_zext_i128_v4i32:
; AVX: # BB#0:
-; AVX-NEXT: movl (%rdi), %eax
-; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: retq
;
; X32-SSE1-LABEL: load_i32_zext_i128_v4i32:
OpenPOWER on IntegriCloud