diff options
| author | Chandler Carruth <chandlerc@gmail.com> | 2014-09-15 12:40:54 +0000 |
|---|---|---|
| committer | Chandler Carruth <chandlerc@gmail.com> | 2014-09-15 12:40:54 +0000 |
| commit | 707a2e098dd6b30ebf7d6246360c794296eb0ad9 (patch) | |
| tree | 2c65e45ef58f991b02a68b32bb4e999d712e5161 /llvm/test | |
| parent | 1009df42c0f8089e3f7c13d37a9b1d861710729e (diff) | |
| download | bcm5719-llvm-707a2e098dd6b30ebf7d6246360c794296eb0ad9.tar.gz bcm5719-llvm-707a2e098dd6b30ebf7d6246360c794296eb0ad9.zip | |
[x86] Begin emitting PBLENDW instructions for integer blend operations
when SSE4.1 is available.
This removes a ton of domain crossing from blend code paths that were
ending up in the floating point code path.
This is just the tip of the iceberg though. The real switch is for
integer blend lowering to more actively rely on this instruction being
available so we don't hit shufps at all any longer. =] That will come in
a follow-up patch.
Another place where we need better support is for using PBLENDVB when
doing so avoids the need to have two complementary PSHUFB masks.
llvm-svn: 217767
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll | 14 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll | 20 |
2 files changed, 17 insertions, 17 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll index f6382a98559..d23b0794305 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -170,7 +170,7 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) { ; SSE3-NEXT: retq ; ; SSE41-LABEL: @shuffle_v2i64_03 -; SSE41: blendpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; SSE41: pblendw {{.*}} # xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; SSE41-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> ret <2 x i64> %shuffle @@ -187,8 +187,8 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 ; SSE3-NEXT: retq ; ; SSE41-LABEL: @shuffle_v2i64_03_copy -; SSE41: blendpd {{.*}} # xmm1 = xmm1[0],xmm2[1] -; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41: pblendw {{.*}} # xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> ret <2 x i64> %shuffle @@ -251,8 +251,8 @@ define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) { ; SSE3-NEXT: retq ; ; SSE41-LABEL: @shuffle_v2i64_21 -; SSE41: blendpd {{.*}} # xmm1 = xmm1[0],xmm0[1] -; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41: pblendw {{.*}} # xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> ret <2 x i64> %shuffle @@ -269,8 +269,8 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 ; SSE3-NEXT: retq ; ; SSE41-LABEL: @shuffle_v2i64_21_copy -; SSE41: blendpd {{.*}} # xmm2 = xmm2[0],xmm1[1] -; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41: pblendw {{.*}} # xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> ret <2 x i64> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index affa933768b..757ef8bf176 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -40,7 +40,7 @@ define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_0300 ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vblendpd {{.*}} # xmm1 = xmm0[0],xmm1[1] +; AVX1-NEXT: vpblendw {{.*}} # xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq @@ -282,7 +282,7 @@ define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1] -; AVX1-NEXT: vblendpd {{.*}} # xmm1 = xmm2[0],xmm1[1] +; AVX1-NEXT: vpblendw {{.*}} # xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4> @@ -293,7 +293,7 @@ define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm2[0,1,0,1] -; AVX1-NEXT: vblendpd {{.*}} # xmm1 = xmm1[0],xmm2[1] +; AVX1-NEXT: vpblendw {{.*}} # xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2> @@ -305,7 +305,7 @@ define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0] ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1] -; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2> @@ -317,7 +317,7 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0] ; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] -; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2> @@ -335,9 +335,9 @@ define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_0451 ; AVX1: # BB#0: ; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm1[2,3,0,1] -; AVX1-NEXT: vblendpd {{.*}} # xmm2 = xmm2[0],xmm0[1] +; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7] ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1] -; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1> @@ -355,9 +355,9 @@ define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_4015 ; AVX1: # BB#0: ; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm0[2,3,0,1] -; AVX1-NEXT: vblendpd {{.*}} # xmm2 = xmm2[0],xmm1[1] +; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] ; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] -; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5> @@ -369,7 +369,7 @@ define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 ; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[2,3,2,3] -; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,0,1] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq |

