summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2015-02-04 10:58:53 +0000
committerChandler Carruth <chandlerc@gmail.com>2015-02-04 10:58:53 +0000
commit4d31f58c8833427bb51a49c48868f4ce46fe5fa9 (patch)
tree906b8577e4d0f5639030a937c83e2355fe03f4d2
parent9180f96cfe0aa1157cc76961f75a513c9f2f47e5 (diff)
downloadbcm5719-llvm-4d31f58c8833427bb51a49c48868f4ce46fe5fa9.tar.gz
bcm5719-llvm-4d31f58c8833427bb51a49c48868f4ce46fe5fa9.zip
[x86] Give movss and movsd execution domains in the x86 backend.
This associates movss and movsd with the packed single and packed double execution domains (resp.). While this is largely cosmetic, as we now don't have weird ping-pong-ing between single and double precision, it is also useful because it avoids the domain fixing algorithm from seeing domain breaks that don't actually exist. It will also be much more important if we have an execution domain default other than packed single, as that would cause us to mix movss and movsd with integer vector code on a regular basis, a very bad mixture. llvm-svn: 228135
-rw-r--r--llvm/lib/Target/X86/X86InstrFormats.td5
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td34
-rw-r--r--llvm/test/CodeGen/X86/sse2.ll2
-rw-r--r--llvm/test/CodeGen/X86/uint_to_fp-2.ll2
-rw-r--r--llvm/test/CodeGen/X86/vector-blend.ll40
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll38
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll2
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll2
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining.ll8
-rw-r--r--llvm/test/CodeGen/X86/vselect-2.ll4
-rw-r--r--llvm/test/CodeGen/X86/vselect.ll6
11 files changed, 75 insertions, 68 deletions
diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td
index ba2387823f5..f0a8ce982d0 100644
--- a/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/llvm/lib/Target/X86/X86InstrFormats.td
@@ -422,8 +422,9 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
// SI - SSE 1 & 2 scalar instructions
class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = NoItinerary>
- : I<o, F, outs, ins, asm, pattern, itin> {
+ list<dag> pattern, InstrItinClass itin = NoItinerary,
+ Domain d = GenericDomain>
+ : I<o, F, outs, ins, asm, pattern, itin, d> {
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
!if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 9f6ac169aca..3e287ccc74b 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -548,13 +548,13 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
X86MemOperand x86memop, string base_opc,
- string asm_opr> {
+ string asm_opr, Domain d = GenericDomain> {
def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, RC:$src2),
!strconcat(base_opc, asm_opr),
[(set VR128:$dst, (vt (OpNode VR128:$src1,
(scalar_to_vector RC:$src2))))],
- IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>;
+ IIC_SSE_MOV_S_RR, d>, Sched<[WriteFShuffle]>;
// For the disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
@@ -565,49 +565,55 @@ multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
}
multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
- X86MemOperand x86memop, string OpcodeStr> {
+ X86MemOperand x86memop, string OpcodeStr,
+ Domain d = GenericDomain> {
// AVX
defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d>,
VEX_4V, VEX_LIG;
def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+ [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR, d>,
VEX, VEX_LIG, Sched<[WriteStore]>;
// SSE1 & 2
let Constraints = "$src1 = $dst" in {
defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
- "\t{$src2, $dst|$dst, $src2}">;
+ "\t{$src2, $dst|$dst, $src2}", d>;
}
def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+ [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR, d>,
Sched<[WriteStore]>;
}
// Loading from memory automatically zeroing upper bits.
multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
- PatFrag mem_pat, string OpcodeStr> {
+ PatFrag mem_pat, string OpcodeStr,
+ Domain d = GenericDomain> {
def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))],
- IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>;
+ IIC_SSE_MOV_S_RM, d>, VEX, VEX_LIG, Sched<[WriteLoad]>;
def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))],
- IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>;
+ IIC_SSE_MOV_S_RM, d>, Sched<[WriteLoad]>;
}
-defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS;
-defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd">, XD;
+defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss",
+ SSEPackedSingle>, XS;
+defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
+ SSEPackedDouble>, XD;
let canFoldAsLoad = 1, isReMaterializable = 1 in {
- defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
+ defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss",
+ SSEPackedSingle>, XS;
let AddedComplexity = 20 in
- defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
+ defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd",
+ SSEPackedDouble>, XD;
}
// Patterns
diff --git a/llvm/test/CodeGen/X86/sse2.ll b/llvm/test/CodeGen/X86/sse2.ll
index ee5a47c00c5..92411a18129 100644
--- a/llvm/test/CodeGen/X86/sse2.ll
+++ b/llvm/test/CodeGen/X86/sse2.ll
@@ -179,7 +179,7 @@ define void @test12() nounwind {
; CHECK-LABEL: test12:
; CHECK: ## BB#0:
; CHECK-NEXT: movapd 0, %xmm0
-; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; CHECK-NEXT: movapd {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; CHECK-NEXT: xorpd %xmm2, %xmm2
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
diff --git a/llvm/test/CodeGen/X86/uint_to_fp-2.ll b/llvm/test/CodeGen/X86/uint_to_fp-2.ll
index e47f15453ed..4b594f7c62a 100644
--- a/llvm/test/CodeGen/X86/uint_to_fp-2.ll
+++ b/llvm/test/CodeGen/X86/uint_to_fp-2.ll
@@ -7,7 +7,7 @@ define float @test1(i32 %x) nounwind readnone {
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: movsd .LCPI0_0, %xmm0
; CHECK-NEXT: movd {{[0-9]+}}(%esp), %xmm1
-; CHECK-NEXT: orps %xmm0, %xmm1
+; CHECK-NEXT: orpd %xmm0, %xmm1
; CHECK-NEXT: subsd %xmm0, %xmm1
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsd2ss %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-blend.ll b/llvm/test/CodeGen/X86/vector-blend.ll
index 39b2192a4f2..0a7114b5510 100644
--- a/llvm/test/CodeGen/X86/vector-blend.ll
+++ b/llvm/test/CodeGen/X86/vector-blend.ll
@@ -168,13 +168,13 @@ define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
; SSE2-LABEL: vsel_double:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double:
@@ -195,13 +195,13 @@ define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
; SSE2-LABEL: vsel_i64:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_i64:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_i64:
@@ -362,9 +362,9 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
-; SSE2-NEXT: movaps %xmm4, %xmm0
+; SSE2-NEXT: movapd %xmm4, %xmm0
; SSE2-NEXT: movaps %xmm5, %xmm1
-; SSE2-NEXT: movaps %xmm6, %xmm2
+; SSE2-NEXT: movapd %xmm6, %xmm2
; SSE2-NEXT: movaps %xmm7, %xmm3
; SSE2-NEXT: retq
;
@@ -372,9 +372,9 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
-; SSSE3-NEXT: movaps %xmm4, %xmm0
+; SSSE3-NEXT: movapd %xmm4, %xmm0
; SSSE3-NEXT: movaps %xmm5, %xmm1
-; SSSE3-NEXT: movaps %xmm6, %xmm2
+; SSSE3-NEXT: movapd %xmm6, %xmm2
; SSSE3-NEXT: movaps %xmm7, %xmm3
; SSSE3-NEXT: retq
;
@@ -401,9 +401,9 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
-; SSE2-NEXT: movaps %xmm4, %xmm0
+; SSE2-NEXT: movapd %xmm4, %xmm0
; SSE2-NEXT: movaps %xmm5, %xmm1
-; SSE2-NEXT: movaps %xmm6, %xmm2
+; SSE2-NEXT: movapd %xmm6, %xmm2
; SSE2-NEXT: movaps %xmm7, %xmm3
; SSE2-NEXT: retq
;
@@ -411,9 +411,9 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
-; SSSE3-NEXT: movaps %xmm4, %xmm0
+; SSSE3-NEXT: movapd %xmm4, %xmm0
; SSSE3-NEXT: movaps %xmm5, %xmm1
-; SSSE3-NEXT: movaps %xmm6, %xmm2
+; SSSE3-NEXT: movapd %xmm6, %xmm2
; SSSE3-NEXT: movaps %xmm7, %xmm3
; SSSE3-NEXT: retq
;
@@ -446,16 +446,16 @@ define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
-; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: movaps %xmm3, %xmm1
+; SSE2-NEXT: movapd %xmm2, %xmm0
+; SSE2-NEXT: movapd %xmm3, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double4:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
-; SSSE3-NEXT: movaps %xmm2, %xmm0
-; SSSE3-NEXT: movaps %xmm3, %xmm1
+; SSSE3-NEXT: movapd %xmm2, %xmm0
+; SSSE3-NEXT: movapd %xmm3, %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double4:
@@ -558,14 +558,14 @@ define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: movaps %xmm3, %xmm1
+; SSE2-NEXT: movapd %xmm3, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: constant_blendvpd_avx:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
; SSSE3-NEXT: movaps %xmm2, %xmm0
-; SSSE3-NEXT: movaps %xmm3, %xmm1
+; SSSE3-NEXT: movapd %xmm3, %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: constant_blendvpd_avx:
@@ -744,13 +744,13 @@ define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double>
; SSE2-LABEL: blend_shufflevector_4xdouble:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
-; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: blend_shufflevector_4xdouble:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
-; SSSE3-NEXT: movaps %xmm2, %xmm0
+; SSSE3-NEXT: movapd %xmm2, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: blend_shufflevector_4xdouble:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
index 710c9dd5f06..bf982ea1934 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
@@ -212,19 +212,19 @@ define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
; SSE2-LABEL: shuffle_v2f64_03:
; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2f64_03:
; SSE3: # BB#0:
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE3-NEXT: movaps %xmm1, %xmm0
+; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2f64_03:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2f64_03:
@@ -300,19 +300,19 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_03:
; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_03:
; SSE3: # BB#0:
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE3-NEXT: movaps %xmm1, %xmm0
+; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_03:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_03:
@@ -336,19 +336,19 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
; SSE2-LABEL: shuffle_v2i64_03_copy:
; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
-; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_03_copy:
; SSE3: # BB#0:
; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
-; SSE3-NEXT: movaps %xmm2, %xmm0
+; SSE3-NEXT: movapd %xmm2, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_03_copy:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
-; SSSE3-NEXT: movaps %xmm2, %xmm0
+; SSSE3-NEXT: movapd %xmm2, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_03_copy:
@@ -523,19 +523,19 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
; SSE2-LABEL: shuffle_v2i64_21_copy:
; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_21_copy:
; SSE3: # BB#0:
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
-; SSE3-NEXT: movaps %xmm1, %xmm0
+; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_21_copy:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
-; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_21_copy:
@@ -692,19 +692,19 @@ define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
; SSE2-LABEL: shuffle_v2i64_z1:
; SSE2: # BB#0:
-; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: xorpd %xmm1, %xmm1
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_z1:
; SSE3: # BB#0:
-; SSE3-NEXT: xorps %xmm1, %xmm1
+; SSE3-NEXT: xorpd %xmm1, %xmm1
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_z1:
; SSSE3: # BB#0:
-; SSSE3-NEXT: xorps %xmm1, %xmm1
+; SSSE3-NEXT: xorpd %xmm1, %xmm1
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
@@ -779,19 +779,19 @@ define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
; SSE2-LABEL: shuffle_v2f64_z1:
; SSE2: # BB#0:
-; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: xorpd %xmm1, %xmm1
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2f64_z1:
; SSE3: # BB#0:
-; SSE3-NEXT: xorps %xmm1, %xmm1
+; SSE3-NEXT: xorpd %xmm1, %xmm1
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2f64_z1:
; SSSE3: # BB#0:
-; SSSE3-NEXT: xorps %xmm1, %xmm1
+; SSSE3-NEXT: xorpd %xmm1, %xmm1
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
@@ -991,7 +991,7 @@ define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
; SSE-LABEL: insert_reg_lo_v2f64:
; SSE: # BB#0:
; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_reg_lo_v2f64:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
index 4c6641cc6da..02e80125841 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -1467,7 +1467,7 @@ define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) {
; SSE-LABEL: insert_reg_lo_v4f32:
; SSE: # BB#0:
; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_reg_lo_v4f32:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
index 11a9da1fe48..1a3ac07fc3b 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -801,7 +801,7 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
; ALL-LABEL: insert_reg_and_zero_v4f64:
; ALL: # BB#0:
-; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; ALL-NEXT: retq
%v = insertelement <4 x double> undef, double %a, i32 0
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
index ecc9b6e2c59..184542ce270 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -2124,13 +2124,13 @@ define <4 x float> @combine_undef_input_test5(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_undef_input_test5:
; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_undef_input_test5:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test5:
@@ -2309,13 +2309,13 @@ define <4 x float> @combine_undef_input_test15(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_undef_input_test15:
; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_undef_input_test15:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test15:
diff --git a/llvm/test/CodeGen/X86/vselect-2.ll b/llvm/test/CodeGen/X86/vselect-2.ll
index 8ea7580d76f..fe4cfba08b8 100644
--- a/llvm/test/CodeGen/X86/vselect-2.ll
+++ b/llvm/test/CodeGen/X86/vselect-2.ll
@@ -5,7 +5,7 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test1:
; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test1:
@@ -34,7 +34,7 @@ define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
; SSE2-LABEL: test3:
; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test3:
diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll
index e8e4cc25e69..c1e9329859c 100644
--- a/llvm/test/CodeGen/X86/vselect.ll
+++ b/llvm/test/CodeGen/X86/vselect.ll
@@ -18,7 +18,7 @@ define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test2:
; CHECK: # BB#0:
; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
ret <4 x float> %1
@@ -226,7 +226,7 @@ define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: test24:
; CHECK: # BB#0:
; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %a, <2 x double> %b
ret <2 x double> %1
@@ -236,7 +236,7 @@ define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test25:
; CHECK: # BB#0:
; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a, <2 x i64> %b
ret <2 x i64> %1
OpenPOWER on IntegriCloud