summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp14
-rw-r--r--llvm/test/CodeGen/X86/coalesce_commute_movsd.ll2
-rw-r--r--llvm/test/CodeGen/X86/combine-sdiv.ll8
-rw-r--r--llvm/test/CodeGen/X86/palignr.ll8
-rw-r--r--llvm/test/CodeGen/X86/psubus.ll4
-rw-r--r--llvm/test/CodeGen/X86/sdiv-exact.ll4
-rw-r--r--llvm/test/CodeGen/X86/sse-align-12.ll4
-rw-r--r--llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll28
-rw-r--r--llvm/test/CodeGen/X86/swizzle-2.ll4
-rw-r--r--llvm/test/CodeGen/X86/trunc-subvector.ll6
-rw-r--r--llvm/test/CodeGen/X86/vector-blend.ll40
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll68
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll18
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll6
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll4
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining.ll8
-rw-r--r--llvm/test/CodeGen/X86/vselect-2.ll4
-rw-r--r--llvm/test/CodeGen/X86/vselect.ll8
-rw-r--r--llvm/test/CodeGen/X86/x86-shifts.ll16
19 files changed, 134 insertions, 120 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index e39819f4ac6..e5da23cf29d 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -6438,6 +6438,8 @@ uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const {
MI.getOperand(2).getSubReg() == 0)
return 0x6;
return 0;
+ case X86::SHUFPDrri:
+ return 0x6;
}
return 0;
}
@@ -6558,6 +6560,18 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
// We must always return true for MOVHLPSrr.
if (Opcode == X86::MOVHLPSrr)
return true;
+ break;
+ case X86::SHUFPDrri: {
+ if (Domain == 1) {
+ unsigned Imm = MI.getOperand(3).getImm();
+ unsigned NewImm = 0x44;
+ if (Imm & 1) NewImm |= 0x0a;
+ if (Imm & 2) NewImm |= 0xa0;
+ MI.getOperand(3).setImm(NewImm);
+ MI.setDesc(get(X86::SHUFPSrri));
+ }
+ return true;
+ }
}
return false;
}
diff --git a/llvm/test/CodeGen/X86/coalesce_commute_movsd.ll b/llvm/test/CodeGen/X86/coalesce_commute_movsd.ll
index 31537b58131..b42fd957d7f 100644
--- a/llvm/test/CodeGen/X86/coalesce_commute_movsd.ll
+++ b/llvm/test/CodeGen/X86/coalesce_commute_movsd.ll
@@ -9,7 +9,7 @@
define <2 x double> @insert_f64(double %a0, <2 x double> %a1) {
; SSE2-LABEL: insert_f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: insert_f64:
diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll
index 27d49cad966..156cf375354 100644
--- a/llvm/test/CodeGen/X86/combine-sdiv.ll
+++ b/llvm/test/CodeGen/X86/combine-sdiv.ll
@@ -1529,7 +1529,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) {
; SSE2-NEXT: psrlq $2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: combine_vec_sdiv_by_pow2b_v2i64:
@@ -1615,7 +1615,7 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) {
; SSE2-NEXT: psrlq $2, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
@@ -1753,7 +1753,7 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
; SSE2-NEXT: psrlq $2, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: psrlq $62, %xmm4
@@ -1764,7 +1764,7 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
; SSE2-NEXT: psrlq $2, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
-; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm4[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3]
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
diff --git a/llvm/test/CodeGen/X86/palignr.ll b/llvm/test/CodeGen/X86/palignr.ll
index 19d493b0c77..aedd013bf25 100644
--- a/llvm/test/CodeGen/X86/palignr.ll
+++ b/llvm/test/CodeGen/X86/palignr.ll
@@ -61,8 +61,8 @@ define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK-SSE2-LABEL: test4:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
-; CHECK-SSE2-NEXT: movapd %xmm1, %xmm0
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
+; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
; CHECK-SSE2-NEXT: retl
;
; CHECK-SSSE3-LABEL: test4:
@@ -81,8 +81,8 @@ define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind {
; CHECK-SSE-LABEL: test5:
; CHECK-SSE: # %bb.0:
-; CHECK-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
-; CHECK-SSE-NEXT: movapd %xmm1, %xmm0
+; CHECK-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
+; CHECK-SSE-NEXT: movaps %xmm1, %xmm0
; CHECK-SSE-NEXT: retl
;
; CHECK-AVX-LABEL: test5:
diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll
index 34f4bf23ced..438dce60511 100644
--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -697,7 +697,7 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
; SSSE3-NEXT: pcmpgtd %xmm6, %xmm7
; SSSE3-NEXT: pshufb %xmm5, %xmm7
; SSSE3-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm11[0],xmm7[1],xmm11[1]
-; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm7[1]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm7[2,3]
; SSSE3-NEXT: psubd %xmm8, %xmm3
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; SSSE3-NEXT: pand %xmm5, %xmm4
@@ -707,7 +707,7 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
; SSSE3-NEXT: pand %xmm5, %xmm1
; SSSE3-NEXT: packuswb %xmm2, %xmm1
; SSSE3-NEXT: packuswb %xmm3, %xmm1
-; SSSE3-NEXT: andnpd %xmm1, %xmm0
+; SSSE3-NEXT: pandn %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: test14:
diff --git a/llvm/test/CodeGen/X86/sdiv-exact.ll b/llvm/test/CodeGen/X86/sdiv-exact.ll
index 3caaf40e718..3c238d7f1bf 100644
--- a/llvm/test/CodeGen/X86/sdiv-exact.ll
+++ b/llvm/test/CodeGen/X86/sdiv-exact.ll
@@ -82,9 +82,9 @@ define <4 x i32> @test5(<4 x i32> %x) {
; X86: # %bb.0:
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrad $3, %xmm1
-; X86-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
+; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
; X86-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,3264175145,3264175145]
-; X86-NEXT: movapd %xmm1, %xmm0
+; X86-NEXT: movaps %xmm1, %xmm0
; X86-NEXT: pmuludq %xmm2, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3]
diff --git a/llvm/test/CodeGen/X86/sse-align-12.ll b/llvm/test/CodeGen/X86/sse-align-12.ll
index 15c3cb014ab..36c30e9eb82 100644
--- a/llvm/test/CodeGen/X86/sse-align-12.ll
+++ b/llvm/test/CodeGen/X86/sse-align-12.ll
@@ -40,8 +40,8 @@ define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
define <2 x double> @c(<2 x double>* %y) nounwind {
; CHECK-LABEL: c:
; CHECK: # %bb.0:
-; CHECK-NEXT: movupd (%rdi), %xmm0
-; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; CHECK-NEXT: movups (%rdi), %xmm0
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: retq
%x = load <2 x double>, <2 x double>* %y, align 8
%a = extractelement <2 x double> %x, i32 0
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
index eb1190151eb..af591a6849f 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -2179,9 +2179,9 @@ define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
; X86-SSE-LABEL: test_mm_loadr_pd:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT: movapd (%eax), %xmm0 # encoding: [0x66,0x0f,0x28,0x00]
-; X86-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
-; X86-SSE-NEXT: # xmm0 = xmm0[1,0]
+; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
+; X86-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
+; X86-SSE-NEXT: # xmm0 = xmm0[2,3,0,1]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_loadr_pd:
@@ -2200,9 +2200,9 @@ define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
;
; X64-SSE-LABEL: test_mm_loadr_pd:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movapd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x28,0x07]
-; X64-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
-; X64-SSE-NEXT: # xmm0 = xmm0[1,0]
+; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
+; X64-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
+; X64-SSE-NEXT: # xmm0 = xmm0[2,3,0,1]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_loadr_pd:
@@ -4728,8 +4728,8 @@ define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) {
define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_mm_shuffle_pd:
; SSE: # %bb.0:
-; SSE-NEXT: shufpd $1, %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc1,0x01]
-; SSE-NEXT: # xmm0 = xmm0[1],xmm1[0]
+; SSE-NEXT: shufps $78, %xmm1, %xmm0 # encoding: [0x0f,0xc6,0xc1,0x4e]
+; SSE-NEXT: # xmm0 = xmm0[2,3],xmm1[0,1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_shuffle_pd:
@@ -5650,9 +5650,9 @@ define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
; X86-SSE-LABEL: test_mm_storer_pd:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
-; X86-SSE-NEXT: # xmm0 = xmm0[1,0]
-; X86-SSE-NEXT: movapd %xmm0, (%eax) # encoding: [0x66,0x0f,0x29,0x00]
+; X86-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
+; X86-SSE-NEXT: # xmm0 = xmm0[2,3,0,1]
+; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_storer_pd:
@@ -5673,9 +5673,9 @@ define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
;
; X64-SSE-LABEL: test_mm_storer_pd:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
-; X64-SSE-NEXT: # xmm0 = xmm0[1,0]
-; X64-SSE-NEXT: movapd %xmm0, (%rdi) # encoding: [0x66,0x0f,0x29,0x07]
+; X64-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
+; X64-SSE-NEXT: # xmm0 = xmm0[2,3,0,1]
+; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_storer_pd:
diff --git a/llvm/test/CodeGen/X86/swizzle-2.ll b/llvm/test/CodeGen/X86/swizzle-2.ll
index dad6a4d7d4f..91c18bebe6b 100644
--- a/llvm/test/CodeGen/X86/swizzle-2.ll
+++ b/llvm/test/CodeGen/X86/swizzle-2.ll
@@ -192,7 +192,7 @@ define <4 x float> @swizzle_18(<4 x float> %v) {
define <4 x float> @swizzle_19(<4 x float> %v) {
; CHECK-LABEL: swizzle_19:
; CHECK: # %bb.0:
-; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: retq
%1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
%2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
@@ -232,7 +232,7 @@ define <4 x float> @swizzle_22(<4 x float> %v) {
define <4 x float> @swizzle_23(<4 x float> %v) {
; CHECK-LABEL: swizzle_23:
; CHECK: # %bb.0:
-; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: retq
%1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
%2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
diff --git a/llvm/test/CodeGen/X86/trunc-subvector.ll b/llvm/test/CodeGen/X86/trunc-subvector.ll
index abd4fb45ea9..d52fe0f8ff8 100644
--- a/llvm/test/CodeGen/X86/trunc-subvector.ll
+++ b/llvm/test/CodeGen/X86/trunc-subvector.ll
@@ -101,7 +101,7 @@ define <2 x i32> @test5(<8 x i32> %v) {
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE2-NEXT: retq
;
; AVX2-LABEL: test5:
@@ -216,10 +216,10 @@ define <2 x i32> @test9(<8 x i32> %v) {
define <2 x i32> @test10(<8 x i32> %v) {
; SSE2-LABEL: test10:
; SSE2: # %bb.0:
-; SSE2-NEXT: xorpd %xmm2, %xmm2
+; SSE2-NEXT: xorps %xmm2, %xmm2
; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE2-NEXT: retq
;
; AVX2-LABEL: test10:
diff --git a/llvm/test/CodeGen/X86/vector-blend.ll b/llvm/test/CodeGen/X86/vector-blend.ll
index 549e44471d6..c6bcd299d5f 100644
--- a/llvm/test/CodeGen/X86/vector-blend.ll
+++ b/llvm/test/CodeGen/X86/vector-blend.ll
@@ -149,12 +149,12 @@ entry:
define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
; SSE2-LABEL: vsel_double:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double:
; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double:
@@ -174,12 +174,12 @@ entry:
define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
; SSE2-LABEL: vsel_i64:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_i64:
; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_i64:
@@ -336,16 +336,16 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movaps %xmm7, %xmm3
; SSE2-NEXT: movaps %xmm5, %xmm1
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
-; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double8:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movaps %xmm7, %xmm3
; SSSE3-NEXT: movaps %xmm5, %xmm1
-; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
-; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
+; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double8:
@@ -371,16 +371,16 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movaps %xmm7, %xmm3
; SSE2-NEXT: movaps %xmm5, %xmm1
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
-; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_i648:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movaps %xmm7, %xmm3
; SSSE3-NEXT: movaps %xmm5, %xmm1
-; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
-; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
+; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_i648:
@@ -404,14 +404,14 @@ entry:
define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
; SSE2-LABEL: vsel_double4:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
-; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double4:
; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
-; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double4:
@@ -513,13 +513,13 @@ define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
; SSE2-LABEL: constant_blendvpd_avx:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: constant_blendvpd_avx:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movaps %xmm2, %xmm0
-; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: constant_blendvpd_avx:
@@ -695,12 +695,12 @@ entry:
define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) {
; SSE2-LABEL: blend_shufflevector_4xdouble:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: blend_shufflevector_4xdouble:
; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: blend_shufflevector_4xdouble:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
index 4318e40ec66..2a5ac3ebc29 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
@@ -137,7 +137,7 @@ define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: shuffle_v2f64_10:
; SSE: # %bb.0:
-; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2f64_10:
@@ -193,8 +193,8 @@ define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: shuffle_v2f64_32:
; SSE: # %bb.0:
-; SSE-NEXT: movapd %xmm1, %xmm0
-; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2f64_32:
@@ -222,17 +222,17 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
; SSE2-LABEL: shuffle_v2f64_03:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2f64_03:
; SSE3: # %bb.0:
-; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2f64_03:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2f64_03:
@@ -348,17 +348,17 @@ define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_03:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_03:
; SSE3: # %bb.0:
-; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_03:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_03:
@@ -376,20 +376,20 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_03_copy:
; SSE2: # %bb.0:
-; SSE2-NEXT: movapd %xmm1, %xmm0
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_03_copy:
; SSE3: # %bb.0:
-; SSE3-NEXT: movapd %xmm1, %xmm0
-; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
+; SSE3-NEXT: movaps %xmm1, %xmm0
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_03_copy:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movapd %xmm1, %xmm0
-; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_03_copy:
@@ -408,12 +408,12 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_12:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_12:
; SSE3: # %bb.0:
-; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_12:
@@ -438,14 +438,14 @@ define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_12_copy:
; SSE2: # %bb.0:
-; SSE2-NEXT: movapd %xmm1, %xmm0
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm2[0]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm2[0,1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_12_copy:
; SSE3: # %bb.0:
-; SSE3-NEXT: movapd %xmm1, %xmm0
-; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm2[0]
+; SSE3-NEXT: movaps %xmm1, %xmm0
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm2[0,1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_12_copy:
@@ -585,14 +585,14 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_30:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
+; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_30:
; SSE3: # %bb.0:
-; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
-; SSE3-NEXT: movapd %xmm1, %xmm0
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
+; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_30:
@@ -615,14 +615,14 @@ define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_30_copy:
; SSE2: # %bb.0:
-; SSE2-NEXT: movapd %xmm2, %xmm0
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_30_copy:
; SSE3: # %bb.0:
-; SSE3-NEXT: movapd %xmm2, %xmm0
-; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE3-NEXT: movaps %xmm2, %xmm0
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_30_copy:
@@ -1079,17 +1079,17 @@ define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
; SSE2-LABEL: insert_reg_lo_v2f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_reg_lo_v2f64:
; SSE3: # %bb.0:
-; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_reg_lo_v2f64:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_reg_lo_v2f64:
@@ -1268,8 +1268,8 @@ define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
; SSE-LABEL: shuffle_mem_v2f64_10:
; SSE: # %bb.0:
-; SSE-NEXT: movapd (%rdi), %xmm0
-; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: movaps (%rdi), %xmm0
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_mem_v2f64_10:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
index 21b04ccd200..f37cd88101b 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -1443,14 +1443,14 @@ define <4 x i32> @shuffle_v4i32_7012(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @shuffle_v4i32_6701(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: shuffle_v4i32_6701:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
+; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v4i32_6701:
; SSE3: # %bb.0:
-; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
-; SSE3-NEXT: movapd %xmm1, %xmm0
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
+; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v4i32_6701:
@@ -1540,12 +1540,12 @@ define <4 x i32> @shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: shuffle_v4i32_2345:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v4i32_2345:
; SSE3: # %bb.0:
-; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v4i32_2345:
@@ -2255,17 +2255,17 @@ define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) {
; SSE2-LABEL: insert_reg_lo_v4f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_reg_lo_v4f32:
; SSE3: # %bb.0:
-; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_reg_lo_v4f32:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_reg_lo_v4f32:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
index 87520abb060..c756fe7d197 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -37,7 +37,7 @@ define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_456789AB:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v8i16_456789AB:
@@ -1260,7 +1260,7 @@ define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_032dXXXX:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,0]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
@@ -1459,7 +1459,7 @@ define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_012dcde3:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,2,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
index e01f5b4f576..b5696e33b7f 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
@@ -43,7 +43,7 @@ define <16 x i8> @combine_vpshufb_as_movq(<16 x i8> %a0) {
define <2 x double> @combine_pshufb_as_movsd(<2 x double> %a0, <2 x double> %a1) {
; SSSE3-LABEL: combine_pshufb_as_movsd:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_pshufb_as_movsd:
@@ -668,7 +668,7 @@ declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind rea
define <16 x i8> @combine_pshufb_pshufb_or_as_blend(<16 x i8> %a0, <16 x i8> %a1) {
; SSSE3-LABEL: combine_pshufb_pshufb_or_as_blend:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_pshufb_pshufb_or_as_blend:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
index 2594a0a9111..58b7be1ec80 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -2132,12 +2132,12 @@ define <4 x float> @combine_undef_input_test4(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_undef_input_test5(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_undef_input_test5:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_undef_input_test5:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test5:
@@ -2314,12 +2314,12 @@ define <4 x float> @combine_undef_input_test14(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_undef_input_test15(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_undef_input_test15:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_undef_input_test15:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test15:
diff --git a/llvm/test/CodeGen/X86/vselect-2.ll b/llvm/test/CodeGen/X86/vselect-2.ll
index 040f1d8b6f4..c751b7a7c87 100644
--- a/llvm/test/CodeGen/X86/vselect-2.ll
+++ b/llvm/test/CodeGen/X86/vselect-2.ll
@@ -7,7 +7,7 @@
define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test1:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test1:
@@ -45,7 +45,7 @@ define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
; SSE2-LABEL: test3:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test3:
diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll
index 46adb4cc3ca..d89b1352aee 100644
--- a/llvm/test/CodeGen/X86/vselect.ll
+++ b/llvm/test/CodeGen/X86/vselect.ll
@@ -30,7 +30,7 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: test2:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test2:
@@ -106,7 +106,7 @@ define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: test7:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test7:
@@ -390,7 +390,7 @@ define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) {
define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
; SSE2-LABEL: test24:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test24:
@@ -409,7 +409,7 @@ define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: test25:
; SSE2: # %bb.0:
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test25:
diff --git a/llvm/test/CodeGen/X86/x86-shifts.ll b/llvm/test/CodeGen/X86/x86-shifts.ll
index 73dbb30a8c7..5688d1d28e0 100644
--- a/llvm/test/CodeGen/X86/x86-shifts.ll
+++ b/llvm/test/CodeGen/X86/x86-shifts.ll
@@ -223,10 +223,10 @@ define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
; X32-NEXT: psrlq $8, %xmm2
; X32-NEXT: movdqa %xmm0, %xmm1
; X32-NEXT: psrlq $1, %xmm1
-; X32-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
-; X32-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
-; X32-NEXT: xorpd %xmm2, %xmm1
-; X32-NEXT: movapd %xmm1, %xmm0
+; X32-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3]
+; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
+; X32-NEXT: xorps %xmm2, %xmm1
+; X32-NEXT: movaps %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: shr2_nosplat:
@@ -235,10 +235,10 @@ define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
; X64-NEXT: psrlq $8, %xmm2
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psrlq $1, %xmm1
-; X64-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
-; X64-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
-; X64-NEXT: xorpd %xmm2, %xmm1
-; X64-NEXT: movapd %xmm1, %xmm0
+; X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3]
+; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
+; X64-NEXT: xorps %xmm2, %xmm1
+; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: retq
entry:
%B = lshr <2 x i64> %A, < i64 8, i64 1>
OpenPOWER on IntegriCloud