summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/masked_gather_scatter.ll
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-11-13 17:53:59 +0000
committerCraig Topper <craig.topper@intel.com>2017-11-13 17:53:59 +0000
commitc314f461ddbb45b21b8f1f3c5c58f4eb367e8b8d (patch)
treef2464aed8507c1045312b3b8fba5bd6712a0ce21 /llvm/test/CodeGen/X86/masked_gather_scatter.ll
parentf902e467b75671d06d1e9ad3663b8aa9366bc14b (diff)
downloadbcm5719-llvm-c314f461ddbb45b21b8f1f3c5c58f4eb367e8b8d.tar.gz
bcm5719-llvm-c314f461ddbb45b21b8f1f3c5c58f4eb367e8b8d.zip
[X86] Allow X86ISD::Wrapper to be folded into the base of gather/scatter address
If the base of our gather corresponds to something contained in X86ISD::Wrapper we should be able to fold it into the address. This patch refactors some of the address matching to more fully use the X86ISelAddressMode struct and the getAddressOperands helper. A new helper function matchVectorAddress is added to call matchWrapper or fall back to matchAddressBase. We should also be able to support constant offsets from a wrapper, but I'll look into that in a future patch. We may even be able to completely reuse matchAddress here, but I wanted to start simple and work up to it. Differential Revision: https://reviews.llvm.org/D39927 llvm-svn: 318057
Diffstat (limited to 'llvm/test/CodeGen/X86/masked_gather_scatter.ll')
-rw-r--r--llvm/test/CodeGen/X86/masked_gather_scatter.ll113
1 files changed, 75 insertions, 38 deletions
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index 7cd698e3760..70472c49f57 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_64
; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_32
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX --check-prefix=SKX_SMALL
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq -code-model=large < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX --check-prefix=SKX_LARGE
; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX_32
; RUN: opt -mtriple=x86_64-apple-darwin -scalarize-masked-mem-intrin -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR
; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu -mcpu=skx < %s -o /dev/null
@@ -491,18 +492,34 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
; KNL_32-NEXT: retl
;
-; SKX-LABEL: test9:
-; SKX: # BB#0: # %entry
-; SKX-NEXT: vpbroadcastq %rdi, %zmm2
-; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; SKX-NEXT: vpmovsxdq %ymm1, %zmm1
-; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
-; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0
-; SKX-NEXT: vpaddq %zmm0, %zmm2, %zmm0
-; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
-; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
-; SKX-NEXT: retq
+; SKX_SMALL-LABEL: test9:
+; SKX_SMALL: # BB#0: # %entry
+; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2
+; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1
+; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
+; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
+; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
+; SKX_SMALL-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; SKX_SMALL-NEXT: retq
+;
+; SKX_LARGE-LABEL: test9:
+; SKX_LARGE: # BB#0: # %entry
+; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2
+; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0
+; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; SKX_LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1
+; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
+; SKX_LARGE-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; SKX_LARGE-NEXT: retq
;
; SKX_32-LABEL: test9:
; SKX_32: # BB#0: # %entry
@@ -560,18 +577,34 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
; KNL_32-NEXT: retl
;
-; SKX-LABEL: test10:
-; SKX: # BB#0: # %entry
-; SKX-NEXT: vpbroadcastq %rdi, %zmm2
-; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; SKX-NEXT: vpmovsxdq %ymm1, %zmm1
-; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
-; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0
-; SKX-NEXT: vpaddq %zmm0, %zmm2, %zmm0
-; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
-; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
-; SKX-NEXT: retq
+; SKX_SMALL-LABEL: test10:
+; SKX_SMALL: # BB#0: # %entry
+; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2
+; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1
+; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
+; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
+; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
+; SKX_SMALL-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; SKX_SMALL-NEXT: retq
+;
+; SKX_LARGE-LABEL: test10:
+; SKX_LARGE: # BB#0: # %entry
+; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2
+; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0
+; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; SKX_LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1
+; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
+; SKX_LARGE-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; SKX_LARGE-NEXT: retq
;
; SKX_32-LABEL: test10:
; SKX_32: # BB#0: # %entry
@@ -2330,33 +2363,37 @@ declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <
define <8 x i32> @test_global_array(<8 x i64> %indxs) {
; KNL_64-LABEL: test_global_array:
; KNL_64: # BB#0:
-; KNL_64-NEXT: movl $glob_array, %eax
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
-; KNL_64-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1}
+; KNL_64-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
; KNL_64-NEXT: vmovdqa %ymm1, %ymm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test_global_array:
; KNL_32: # BB#0:
-; KNL_32-NEXT: movl $glob_array, %eax
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
-; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1}
+; KNL_32-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
; KNL_32-NEXT: vmovdqa %ymm1, %ymm0
; KNL_32-NEXT: retl
;
-; SKX-LABEL: test_global_array:
-; SKX: # BB#0:
-; SKX-NEXT: movl $glob_array, %eax
-; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1}
-; SKX-NEXT: vmovdqa %ymm1, %ymm0
-; SKX-NEXT: retq
+; SKX_SMALL-LABEL: test_global_array:
+; SKX_SMALL: # BB#0:
+; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
+; SKX_SMALL-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
+; SKX_SMALL-NEXT: vmovdqa %ymm1, %ymm0
+; SKX_SMALL-NEXT: retq
+;
+; SKX_LARGE-LABEL: test_global_array:
+; SKX_LARGE: # BB#0:
+; SKX_LARGE-NEXT: movabsq $glob_array, %rax
+; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
+; SKX_LARGE-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1}
+; SKX_LARGE-NEXT: vmovdqa %ymm1, %ymm0
+; SKX_LARGE-NEXT: retq
;
; SKX_32-LABEL: test_global_array:
; SKX_32: # BB#0:
-; SKX_32-NEXT: movl $glob_array, %eax
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
-; SKX_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1}
+; SKX_32-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
; SKX_32-NEXT: vmovdqa %ymm1, %ymm0
; SKX_32-NEXT: retl
%p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <8 x i64> %indxs
OpenPOWER on IntegriCloud