summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-06-28 05:16:40 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-06-28 05:16:40 +0000
commit887c2c3482cf002a2057ddc18c23554ffa57af4a (patch)
tree884bc4510c85d65cee5ab53f83e3f23e387cd749 /llvm/test/CodeGen
parent72304efabc30503e84dc13683a734715e8a22115 (diff)
downloadbcm5719-llvm-887c2c3482cf002a2057ddc18c23554ffa57af4a.tar.gz
bcm5719-llvm-887c2c3482cf002a2057ddc18c23554ffa57af4a.zip
[x86] Add handling for splat-like widenings of v16i8 shuffles.
These show up really frequently, not the least with actual splats. =] We lowered these quite badly before. The new code path tries to widen i8 shuffles to i16 shuffles in a splat-like way. There are still some inefficiencies in our i16 splat logic though, so we aren't really done here. Also, for certain patterns (bit of a gather-and-splat) we still generate pretty silly code, and I've left a fixme for addressing it. However, I'm not actually worried about this code pattern as much. The old shuffle lowering generates a 29 instruction monstrosity for it that should execute much more slowly. llvm-svn: 211974
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll94
1 files changed, 89 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
index b5e0b79088f..f3a0931f731 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -3,6 +3,93 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
+define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
+; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; CHECK-SSE2-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
+; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,5,5]
+; CHECK-SSE2-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: punpckhbw %xmm1, %xmm2
+; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0
+; CHECK-SSE2-NEXT: punpcklwd %xmm2, %xmm0
+; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm0
+; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
+; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,5,5]
+; CHECK-SSE2-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
+; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,1,1,4,5,6,7]
+; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,7,7]
+; CHECK-SSE2-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm2 = xmm2[2,1,2,3]
+; CHECK-SSE2-NEXT: punpckhbw %xmm1, %xmm0
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; CHECK-SSE2-NEXT: punpcklwd %xmm2, %xmm0
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
+; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,0,2,3,4,5,6,7]
+; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,6,7]
+; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm0
+; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,0,1]
+; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,6,6]
+; CHECK-SSE2-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
+ ret <16 x i8> %shuffle
+}
+
define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
; CHECK-SSE2-LABEL: @shuffle_v16i8_0101010101010101
; CHECK-SSE2: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
@@ -23,12 +110,9 @@ define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(
define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) {
; CHECK-SSE2-LABEL: @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07
-; CHECK-SSE2: pxor %xmm2, %xmm2
-; CHECK-SSE2-NEXT: punpcklbw %xmm2, %xmm1
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,1,0,3]
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm1
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm1
; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm1
; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0
; CHECK-SSE2-NEXT: retq
OpenPOWER on IntegriCloud