diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2020-01-14 11:41:26 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2020-01-14 11:41:47 +0000 |
commit | fd42a4ac7a69adb92f87c7fa927509f177dcc6ca (patch) | |
tree | 261bcf0821c8072ac5c17e48c7908e09b0157519 | |
parent | a43b0065c5c78eba3fb83881fb628f5b8182db64 (diff) | |
download | bcm5719-llvm-fd42a4ac7a69adb92f87c7fa927509f177dcc6ca.tar.gz bcm5719-llvm-fd42a4ac7a69adb92f87c7fa927509f177dcc6ca.zip |
[X86][SSE] Add add(shl(and(x,c1),c2),c3) test case with non-uniform shift value
As mentioned by @nikic on rGef5debac4302, we should merge the guaranteed top zero bits from the shifted value and min shift amount code so they can both set the high bits to zero.
-rw-r--r-- | llvm/test/CodeGen/X86/combine-shl.ll | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/combine-shl.ll b/llvm/test/CodeGen/X86/combine-shl.ll index 0fb4d67fd88..ae31dc41e34 100644 --- a/llvm/test/CodeGen/X86/combine-shl.ll +++ b/llvm/test/CodeGen/X86/combine-shl.ll @@ -865,6 +865,43 @@ define <4 x i32> @combine_vec_add_shl_nonsplat(<4 x i32> %a0) { ret <4 x i32> %2 } +define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0) { +; SSE2-LABEL: combine_vec_add_shl_and_nonsplat: +; SSE2: # %bb.0: +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4,8,16,32] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] +; SSE2-NEXT: pmuludq %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: pmuludq %xmm2, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: paddd {{.*}}(%rip), %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: combine_vec_add_shl_and_nonsplat: +; SSE41: # %bb.0: +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7] +; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 +; SSE41-NEXT: por {{.*}}(%rip), %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: combine_vec_add_shl_and_nonsplat: +; AVX: # %bb.0: +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7] +; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] +; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = and <4 x i32> %a0, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760> + %2 = shl <4 x i32> %1, <i32 2, i32 3, i32 4, i32 5> + %3 = add <4 x i32> %2, <i32 15, i32 15, i32 15, i32 15> + ret <4 x i32> %3 +} + define <4 x i32> @combine_vec_add_shuffle_shl(<4 x i32> %a0) { ; SSE2-LABEL: combine_vec_add_shuffle_shl: ; SSE2: # %bb.0: |