summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-12-15 17:09:24 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-12-15 17:09:24 +0000
commit7522f54feb3baf9cf347ba48fdf7923981206743 (patch)
treeb979f1fd34dbd6e84c2cd6c1184e353b5b96cc85
parent6ea759a83eb4b3dabf1d5c310386d1e44069e283 (diff)
downloadbcm5719-llvm-7522f54feb3baf9cf347ba48fdf7923981206743.tar.gz
bcm5719-llvm-7522f54feb3baf9cf347ba48fdf7923981206743.zip
[X86][SSE] Fix domains for scalar store instructions
As discussed on D27692 llvm-svn: 289834
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp4
-rw-r--r--llvm/test/CodeGen/X86/masked_memop.ll4
-rw-r--r--llvm/test/CodeGen/X86/oddshuffles.ll2
-rw-r--r--llvm/test/CodeGen/X86/vec_ins_extract-1.ll8
4 files changed, 11 insertions, 7 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index a33354b1886..e87fdbe8e59 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -8457,6 +8457,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
{ X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
{ X86::MOVLPSmr, X86::MOVLPDmr, X86::MOVPQI2QImr },
+ { X86::MOVSSmr, X86::MOVSSmr, X86::MOVPDI2DImr },
{ X86::MOVSDrm, X86::MOVSDrm, X86::MOVQI2PQIrm },
{ X86::MOVSSrm, X86::MOVSSrm, X86::MOVDI2PDIrm },
{ X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
@@ -8475,6 +8476,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
{ X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
{ X86::VMOVLPSmr, X86::VMOVLPDmr, X86::VMOVPQI2QImr },
+ { X86::VMOVSSmr, X86::VMOVSSmr, X86::VMOVPDI2DImr },
{ X86::VMOVSDrm, X86::VMOVSDrm, X86::VMOVQI2PQIrm },
{ X86::VMOVSSrm, X86::VMOVSSrm, X86::VMOVDI2PDIrm },
{ X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
@@ -8498,6 +8500,8 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
{ X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
{ X86::VMOVNTPSZmr, X86::VMOVNTPDZmr, X86::VMOVNTDQZmr },
+ { X86::VMOVSDZmr, X86::VMOVSDZmr, X86::VMOVPQI2QIZmr },
+ { X86::VMOVSSZmr, X86::VMOVSSZmr, X86::VMOVPDI2DIZmr },
{ X86::VMOVSDZrm, X86::VMOVSDZrm, X86::VMOVQI2PQIZrm },
{ X86::VMOVSSZrm, X86::VMOVSSZrm, X86::VMOVDI2PDIZrm },
{ X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128r, X86::VPBROADCASTDZ128r },
diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll
index 7e1837c42a9..f3b3d933982 100644
--- a/llvm/test/CodeGen/X86/masked_memop.ll
+++ b/llvm/test/CodeGen/X86/masked_memop.ll
@@ -979,12 +979,12 @@ define void @test21(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
define void @one_mask_bit_set1(<4 x i32>* %addr, <4 x i32> %val) {
; AVX-LABEL: one_mask_bit_set1:
; AVX: ## BB#0:
-; AVX-NEXT: vmovd %xmm0, (%rdi)
+; AVX-NEXT: vmovss %xmm0, (%rdi)
; AVX-NEXT: retq
;
; AVX512-LABEL: one_mask_bit_set1:
; AVX512: ## BB#0:
-; AVX512-NEXT: vmovd %xmm0, (%rdi)
+; AVX512-NEXT: vmovss %xmm0, (%rdi)
; AVX512-NEXT: retq
call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 false, i1 false, i1 false>)
ret void
diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll
index 89d5c4eb1e4..48869f186fe 100644
--- a/llvm/test/CodeGen/X86/oddshuffles.ll
+++ b/llvm/test/CodeGen/X86/oddshuffles.ll
@@ -359,7 +359,7 @@ define void @v7i32(<4 x i32> %a, <4 x i32> %b, <7 x i32>* %p) nounwind {
; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,3,2]
; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,0,3]
-; AVX1-NEXT: vmovd %xmm1, 24(%rdi)
+; AVX1-NEXT: vmovss %xmm1, 24(%rdi)
; AVX1-NEXT: vmovlps %xmm0, 16(%rdi)
; AVX1-NEXT: vmovaps %xmm2, (%rdi)
; AVX1-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vec_ins_extract-1.ll b/llvm/test/CodeGen/X86/vec_ins_extract-1.ll
index 8adc0e61f8a..85c7875d923 100644
--- a/llvm/test/CodeGen/X86/vec_ins_extract-1.ll
+++ b/llvm/test/CodeGen/X86/vec_ins_extract-1.ll
@@ -90,8 +90,8 @@ define <4 x i32> @t3(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: andl $-16, %esp
; X32-NEXT: subl $32, %esp
-; X32-NEXT: movdqa %xmm0, (%esp)
-; X32-NEXT: movd %xmm0, (%esp,%eax,4)
+; X32-NEXT: movaps %xmm0, (%esp)
+; X32-NEXT: movss %xmm0, (%esp,%eax,4)
; X32-NEXT: movaps (%esp), %xmm0
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
@@ -99,9 +99,9 @@ define <4 x i32> @t3(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
;
; X64-LABEL: t3:
; X64: # BB#0:
-; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movslq %edi, %rax
-; X64-NEXT: movd %xmm0, -24(%rsp,%rax,4)
+; X64-NEXT: movss %xmm0, -24(%rsp,%rax,4)
; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
; X64-NEXT: retq
%t9 = extractelement <4 x i32> %t8, i32 0
OpenPOWER on IntegriCloud