diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-11-13 17:53:59 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-11-13 17:53:59 +0000 |
| commit | c314f461ddbb45b21b8f1f3c5c58f4eb367e8b8d (patch) | |
| tree | f2464aed8507c1045312b3b8fba5bd6712a0ce21 /llvm/test/CodeGen/X86/masked_gather_scatter.ll | |
| parent | f902e467b75671d06d1e9ad3663b8aa9366bc14b (diff) | |
| download | bcm5719-llvm-c314f461ddbb45b21b8f1f3c5c58f4eb367e8b8d.tar.gz bcm5719-llvm-c314f461ddbb45b21b8f1f3c5c58f4eb367e8b8d.zip | |
[X86] Allow X86ISD::Wrapper to be folded into the base of gather/scatter address
If the base of our gather corresponds to something contained in X86ISD::Wrapper we should be able to fold it into the address.
This patch refactors some of the address matching to more fully use the X86ISelAddressMode struct and the getAddressOperands helper. A new helper function matchVectorAddress is added to call matchWrapper or fall back to matchAddressBase.
We should also be able to support constant offsets from a wrapper, but I'll look into that in a future patch. We may even be able to completely reuse matchAddress here, but I wanted to start simple and work up to it.
Differential Revision: https://reviews.llvm.org/D39927
llvm-svn: 318057
Diffstat (limited to 'llvm/test/CodeGen/X86/masked_gather_scatter.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/masked_gather_scatter.ll | 113 |
1 files changed, 75 insertions, 38 deletions
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 7cd698e3760..70472c49f57 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -1,7 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_64 ; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_32 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX --check-prefix=SKX_SMALL +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq -code-model=large < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX --check-prefix=SKX_LARGE ; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX_32 ; RUN: opt -mtriple=x86_64-apple-darwin -scalarize-masked-mem-intrin -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR ; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu -mcpu=skx < %s -o /dev/null @@ -491,18 +492,34 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) { ; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} ; KNL_32-NEXT: retl ; -; SKX-LABEL: test9: -; SKX: # BB#0: # %entry -; SKX-NEXT: vpbroadcastq %rdi, %zmm2 -; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; SKX-NEXT: vpmovsxdq %ymm1, %zmm1 -; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1 -; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 -; SKX-NEXT: vpaddq %zmm0, %zmm2, %zmm0 -; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 -; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} -; SKX-NEXT: retq +; SKX_SMALL-LABEL: test9: +; SKX_SMALL: # BB#0: # %entry +; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2 +; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1 +; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1 +; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0 +; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0 +; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 +; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1 +; SKX_SMALL-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} +; SKX_SMALL-NEXT: retq +; +; SKX_LARGE-LABEL: test9: +; SKX_LARGE: # BB#0: # %entry +; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2 +; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1 +; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax +; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1 +; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax +; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0 +; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0 +; SKX_LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm0 +; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax +; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1 +; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1 +; SKX_LARGE-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} +; SKX_LARGE-NEXT: retq ; ; SKX_32-LABEL: test9: ; SKX_32: # BB#0: # %entry @@ -560,18 +577,34 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) { ; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} ; KNL_32-NEXT: retl ; -; SKX-LABEL: test10: -; SKX: # BB#0: # %entry -; SKX-NEXT: vpbroadcastq %rdi, %zmm2 -; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; SKX-NEXT: vpmovsxdq %ymm1, %zmm1 -; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1 -; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 -; SKX-NEXT: vpaddq %zmm0, %zmm2, %zmm0 -; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 -; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} -; SKX-NEXT: retq +; SKX_SMALL-LABEL: test10: +; SKX_SMALL: # BB#0: # %entry +; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2 +; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1 +; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1 +; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0 +; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0 +; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 +; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1 +; SKX_SMALL-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} +; SKX_SMALL-NEXT: retq +; +; SKX_LARGE-LABEL: test10: +; SKX_LARGE: # BB#0: # %entry +; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2 +; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1 +; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax +; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1 +; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax +; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0 +; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0 +; SKX_LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm0 +; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax +; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1 +; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1 +; SKX_LARGE-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} +; SKX_LARGE-NEXT: retq ; ; SKX_32-LABEL: test10: ; SKX_32: # BB#0: # %entry @@ -2330,33 +2363,37 @@ declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, < define <8 x i32> @test_global_array(<8 x i64> %indxs) { ; KNL_64-LABEL: test_global_array: ; KNL_64: # BB#0: -; KNL_64-NEXT: movl $glob_array, %eax ; KNL_64-NEXT: kxnorw %k0, %k0, %k1 -; KNL_64-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1} +; KNL_64-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} ; KNL_64-NEXT: vmovdqa %ymm1, %ymm0 ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test_global_array: ; KNL_32: # BB#0: -; KNL_32-NEXT: movl $glob_array, %eax ; KNL_32-NEXT: kxnorw %k0, %k0, %k1 -; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1} +; KNL_32-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} ; KNL_32-NEXT: vmovdqa %ymm1, %ymm0 ; KNL_32-NEXT: retl ; -; SKX-LABEL: test_global_array: -; SKX: # BB#0: -; SKX-NEXT: movl $glob_array, %eax -; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1} -; SKX-NEXT: vmovdqa %ymm1, %ymm0 -; SKX-NEXT: retq +; SKX_SMALL-LABEL: test_global_array: +; SKX_SMALL: # BB#0: +; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1 +; SKX_SMALL-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} +; SKX_SMALL-NEXT: vmovdqa %ymm1, %ymm0 +; SKX_SMALL-NEXT: retq +; +; SKX_LARGE-LABEL: test_global_array: +; SKX_LARGE: # BB#0: +; SKX_LARGE-NEXT: movabsq $glob_array, %rax +; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1 +; SKX_LARGE-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1} +; SKX_LARGE-NEXT: vmovdqa %ymm1, %ymm0 +; SKX_LARGE-NEXT: retq ; ; SKX_32-LABEL: test_global_array: ; SKX_32: # BB#0: -; SKX_32-NEXT: movl $glob_array, %eax ; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1} +; SKX_32-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} ; SKX_32-NEXT: vmovdqa %ymm1, %ymm0 ; SKX_32-NEXT: retl %p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <8 x i64> %indxs |

