summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2020-01-14 11:41:26 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2020-01-14 11:41:47 +0000
commitfd42a4ac7a69adb92f87c7fa927509f177dcc6ca (patch)
tree261bcf0821c8072ac5c17e48c7908e09b0157519
parenta43b0065c5c78eba3fb83881fb628f5b8182db64 (diff)
downloadbcm5719-llvm-fd42a4ac7a69adb92f87c7fa927509f177dcc6ca.tar.gz
bcm5719-llvm-fd42a4ac7a69adb92f87c7fa927509f177dcc6ca.zip
[X86][SSE] Add add(shl(and(x,c1),c2),c3) test case with non-uniform shift value
As mentioned by @nikic on rGef5debac4302, we should merge the guaranteed top zero bits from the shifted value and min shift amount code so they can both set the high bits to zero.
-rw-r--r--llvm/test/CodeGen/X86/combine-shl.ll37
1 files changed, 37 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/combine-shl.ll b/llvm/test/CodeGen/X86/combine-shl.ll
index 0fb4d67fd88..ae31dc41e34 100644
--- a/llvm/test/CodeGen/X86/combine-shl.ll
+++ b/llvm/test/CodeGen/X86/combine-shl.ll
@@ -865,6 +865,43 @@ define <4 x i32> @combine_vec_add_shl_nonsplat(<4 x i32> %a0) {
ret <4 x i32> %2
}
+define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0) {
+; SSE2-LABEL: combine_vec_add_shl_and_nonsplat:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4,8,16,32]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE2-NEXT: pmuludq %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT: pmuludq %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: paddd {{.*}}(%rip), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: combine_vec_add_shl_and_nonsplat:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
+; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
+; SSE41-NEXT: por {{.*}}(%rip), %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: combine_vec_add_shl_and_nonsplat:
+; AVX: # %bb.0:
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
+; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
+; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = and <4 x i32> %a0, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
+ %2 = shl <4 x i32> %1, <i32 2, i32 3, i32 4, i32 5>
+ %3 = add <4 x i32> %2, <i32 15, i32 15, i32 15, i32 15>
+ ret <4 x i32> %3
+}
+
define <4 x i32> @combine_vec_add_shuffle_shl(<4 x i32> %a0) {
; SSE2-LABEL: combine_vec_add_shuffle_shl:
; SSE2: # %bb.0:
OpenPOWER on IntegriCloud