summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoopVectorize
diff options
context:
space:
mode:
authorIgor Breger <igor.breger@intel.com>2016-01-25 10:17:11 +0000
committerIgor Breger <igor.breger@intel.com>2016-01-25 10:17:11 +0000
commit6d421419db33a98c52a05a2aefd74492473d0512 (patch)
treeb50bc5f7f492b410e42b20659e75768ec53f0047 /llvm/test/Transforms/LoopVectorize
parentd3e1dede4a28dfffedc8b49177c2abb79a0649e7 (diff)
downloadbcm5719-llvm-6d421419db33a98c52a05a2aefd74492473d0512.tar.gz
bcm5719-llvm-6d421419db33a98c52a05a2aefd74492473d0512.zip
AVX1 : Enable vector masked_load/store to AVX1.
Use AVX1 FP instructions (vmaskmovps/pd) in place of the AVX2 int instructions (vpmaskmovd/q). Differential Revision: http://reviews.llvm.org/D16528 llvm-svn: 258675
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll58
1 files changed, 28 insertions, 30 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
index abe7d6de3f3..f810d17e893 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -1,9 +1,7 @@
-; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1
-; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2
+; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX1
+; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX2
; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512
-;AVX1-NOT: llvm.masked
-
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc_linux"
@@ -18,12 +16,12 @@ target triple = "x86_64-pc_linux"
; }
;}
-;AVX2-LABEL: @foo1
-;AVX2: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
-;AVX2: call <8 x i32> @llvm.masked.load.v8i32
-;AVX2: add nsw <8 x i32>
-;AVX2: call void @llvm.masked.store.v8i32
-;AVX2: ret void
+;AVX-LABEL: @foo1
+;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
+;AVX: call <8 x i32> @llvm.masked.load.v8i32
+;AVX: add nsw <8 x i32>
+;AVX: call void @llvm.masked.store.v8i32
+;AVX: ret void
;AVX512-LABEL: @foo1
;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
@@ -102,12 +100,12 @@ for.end: ; preds = %for.cond
; }
;}
-;AVX2-LABEL: @foo2
-;AVX2: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
-;AVX2: call <8 x float> @llvm.masked.load.v8f32
-;AVX2: fadd <8 x float>
-;AVX2: call void @llvm.masked.store.v8f32
-;AVX2: ret void
+;AVX-LABEL: @foo2
+;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
+;AVX: call <8 x float> @llvm.masked.load.v8f32
+;AVX: fadd <8 x float>
+;AVX: call void @llvm.masked.store.v8f32
+;AVX: ret void
;AVX512-LABEL: @foo2
;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
@@ -187,13 +185,13 @@ for.end: ; preds = %for.cond
; }
;}
-;AVX2-LABEL: @foo3
-;AVX2: icmp slt <4 x i32> %wide.load, <i32 100, i32 100,
-;AVX2: call <4 x double> @llvm.masked.load.v4f64
-;AVX2: sitofp <4 x i32> %wide.load to <4 x double>
-;AVX2: fadd <4 x double>
-;AVX2: call void @llvm.masked.store.v4f64
-;AVX2: ret void
+;AVX-LABEL: @foo3
+;AVX: icmp slt <4 x i32> %wide.load, <i32 100, i32 100,
+;AVX: call <4 x double> @llvm.masked.load.v4f64
+;AVX: sitofp <4 x i32> %wide.load to <4 x double>
+;AVX: fadd <4 x double>
+;AVX: call void @llvm.masked.store.v4f64
+;AVX: ret void
;AVX512-LABEL: @foo3
;AVX512: icmp slt <8 x i32> %wide.load, <i32 100, i32 100,
@@ -275,9 +273,9 @@ for.end: ; preds = %for.cond
; }
;}
-;AVX2-LABEL: @foo4
-;AVX2-NOT: llvm.masked
-;AVX2: ret void
+;AVX-LABEL: @foo4
+;AVX-NOT: llvm.masked
+;AVX: ret void
;AVX512-LABEL: @foo4
;AVX512-NOT: llvm.masked
@@ -349,10 +347,10 @@ for.end: ; preds = %for.cond
; The loop here should not be vectorized due to trapping
; constant expression
-;AVX2-LABEL: @foo5
-;AVX2-NOT: llvm.masked
-;AVX2: store i32 sdiv
-;AVX2: ret void
+;AVX-LABEL: @foo5
+;AVX-NOT: llvm.masked
+;AVX: store i32 sdiv
+;AVX: ret void
;AVX512-LABEL: @foo5
;AVX512-NOT: llvm.masked
OpenPOWER on IntegriCloud