diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-05-21 19:27:50 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-05-21 19:27:50 +0000 |
| commit | ef313905f0afe680deb7a4d0a5fdc678d4d0a0e2 (patch) | |
| tree | 8b61efb5739b6e80d6b1407369a972bc90366cec /llvm/test/CodeGen/X86 | |
| parent | dcd7d6c33112db3618798c1c037460cac58d7f9a (diff) | |
| download | bcm5719-llvm-ef313905f0afe680deb7a4d0a5fdc678d4d0a0e2.tar.gz bcm5719-llvm-ef313905f0afe680deb7a4d0a5fdc678d4d0a0e2.zip | |
[X86] Add test cases for missed vector rotate matching due to SimplifyDemandedBits interfering with the AND masks
As requested in D47116
llvm-svn: 332869
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/combine-rotates.ll | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/combine-rotates.ll b/llvm/test/CodeGen/X86/combine-rotates.ll index e75973af05e..77371eed0a6 100644 --- a/llvm/test/CodeGen/X86/combine-rotates.ll +++ b/llvm/test/CodeGen/X86/combine-rotates.ll @@ -57,3 +57,117 @@ define <4 x i32> @combine_vec_rot_rot_splat_zero(<4 x i32> %x) { %6 = or <4 x i32> %4, %5 ret <4 x i32> %6 } + +define <4 x i32> @rotate_demanded_bits(<4 x i32>, <4 x i32>) { +; CHECK-LABEL: rotate_demanded_bits: +; CHECK: # %bb.0: +; CHECK-NEXT: andb $30, %sil +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: roll %cl, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: retq +; XOP-LABEL: rotate_demanded_bits: +; XOP: # %bb.0: +; XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [30,30,30,30] +; XOP-NEXT: vpand %xmm2, %xmm1, %xmm1 +; XOP-NEXT: vpshld %xmm1, %xmm0, %xmm3 +; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; XOP-NEXT: vpsubd %xmm1, %xmm4, %xmm1 +; XOP-NEXT: vpand %xmm2, %xmm1, %xmm1 +; XOP-NEXT: vpsubd %xmm1, %xmm4, %xmm1 +; XOP-NEXT: vpshld %xmm1, %xmm0, %xmm0 +; XOP-NEXT: vpor %xmm3, %xmm0, %xmm0 +; XOP-NEXT: retq +; +; AVX512-LABEL: rotate_demanded_bits: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [30,30,30,30] +; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm3 +; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX512-NEXT: vpsubd %xmm1, %xmm4, %xmm1 +; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpor %xmm3, %xmm0, %xmm0 +; AVX512-NEXT: retq + %3 = and <4 x i32> %1, <i32 30, i32 30, i32 30, i32 30> + %4 = shl <4 x i32> %0, %3 + %5 = sub nsw <4 x i32> zeroinitializer, %3 + %6 = and <4 x i32> %5, <i32 30, i32 30, i32 30, i32 30> + %7 = lshr <4 x i32> %0, %6 + %8 = or <4 x i32> %7, %4 + ret <4 x i32> %8 +} + +define <4 x i32> @rotate_demanded_bits_2(<4 x i32>, <4 x i32>) { +; CHECK-LABEL: rotate_demanded_bits_2: +; CHECK: # %bb.0: +; CHECK-NEXT: andb $23, %sil +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: roll %cl, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: retq +; XOP-LABEL: rotate_demanded_bits_2: +; XOP: # %bb.0: +; XOP-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; XOP-NEXT: vprotd %xmm1, %xmm0, %xmm0 +; XOP-NEXT: retq +; +; AVX512-LABEL: rotate_demanded_bits_2: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1 +; AVX512-NEXT: vprolvd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq + %3 = and <4 x i32> %1, <i32 23, i32 23, i32 23, i32 23> + %4 = shl <4 x i32> %0, %3 + %5 = sub nsw <4 x i32> zeroinitializer, %3 + %6 = and <4 x i32> %5, <i32 31, i32 31, i32 31, i32 31> + %7 = lshr <4 x i32> %0, %6 + %8 = or <4 x i32> %7, %4 + ret <4 x i32> %8 +} + +define <4 x i32> @rotate_demanded_bits_3(<4 x i32>, <4 x i32>) { +; CHECK-LABEL: rotate_demanded_bits_3: +; CHECK: # %bb.0: +; CHECK-NEXT: addb %sil, %sil +; CHECK-NEXT: andb $30, %sil +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: roll %cl, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: retq +; XOP-LABEL: rotate_demanded_bits_3: +; XOP: # %bb.0: +; XOP-NEXT: vpaddd %xmm1, %xmm1, %xmm1 +; XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [30,30,30,30] +; XOP-NEXT: vpand %xmm2, %xmm1, %xmm3 +; XOP-NEXT: vpshld %xmm3, %xmm0, %xmm3 +; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; XOP-NEXT: vpsubd %xmm1, %xmm4, %xmm1 +; XOP-NEXT: vpand %xmm2, %xmm1, %xmm1 +; XOP-NEXT: vpsubd %xmm1, %xmm4, %xmm1 +; XOP-NEXT: vpshld %xmm1, %xmm0, %xmm0 +; XOP-NEXT: vpor %xmm0, %xmm3, %xmm0 +; XOP-NEXT: retq +; +; AVX512-LABEL: rotate_demanded_bits_3: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [30,30,30,30] +; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512-NEXT: vpsllvd %xmm3, %xmm0, %xmm3 +; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX512-NEXT: vpsubd %xmm1, %xmm4, %xmm1 +; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpor %xmm0, %xmm3, %xmm0 +; AVX512-NEXT: retq + %3 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> + %4 = and <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30> + %5 = shl <4 x i32> %0, %4 + %6 = sub <4 x i32> zeroinitializer, %3 + %7 = and <4 x i32> %6, <i32 30, i32 30, i32 30, i32 30> + %8 = lshr <4 x i32> %0, %7 + %9 = or <4 x i32> %5, %8 + ret <4 x i32> %9 +} |

