summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/masked_gather_scatter.ll10
-rw-r--r--llvm/test/CodeGen/X86/masked_memop.ll25
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll142
3 files changed, 177 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index b65146ed417..19d2c891f85 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -330,3 +330,13 @@ define <3 x i32> @test16(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
ret <3 x i32>%res
}
+declare <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>)
+
+; KNL-LABEL: test17
+; KNL: vpgatherqq
+; KNL: vpgatherqq
+define <16 x float*> @test17(<16 x float**> %ptrs) {
+
+ %res = call <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float*> undef)
+ ret <16 x float*>%res
+}
diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll
index 84ebdf1ef18..a720054c167 100644
--- a/llvm/test/CodeGen/X86/masked_memop.ll
+++ b/llvm/test/CodeGen/X86/masked_memop.ll
@@ -300,3 +300,28 @@ declare void @llvm.masked.store.v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>
declare void @llvm.masked.store.v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
declare void @llvm.masked.store.v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
+declare <16 x i32*> @llvm.masked.load.v16p0i32(<16 x i32*>*, i32, <16 x i1>, <16 x i32*>)
+
+; AVX512-LABEL: test23
+; AVX512: vmovdqu64 64(%rdi), %zmm1 {%k2} {z}
+; AVX512: vmovdqu64 (%rdi), %zmm0 {%k1} {z}
+
+define <16 x i32*> @test23(<16 x i32*> %trigger, <16 x i32*>* %addr) {
+ %mask = icmp eq <16 x i32*> %trigger, zeroinitializer
+ %res = call <16 x i32*> @llvm.masked.load.v16p0i32(<16 x i32*>* %addr, i32 4, <16 x i1>%mask, <16 x i32*>zeroinitializer)
+ ret <16 x i32*> %res
+}
+
+%mystruct = type { i16, i16, [1 x i8*] }
+
+declare <16 x %mystruct*> @llvm.masked.load.v16p0mystruct(<16 x %mystruct*>*, i32, <16 x i1>, <16 x %mystruct*>)
+
+; AVX512-LABEL: test24
+; AVX512: vmovdqu64 (%rdi), %zmm0 {%k1} {z}
+; AVX512: kshiftrw $8, %k1, %k1
+; AVX512: vmovdqu64 64(%rdi), %zmm1 {%k1} {z}
+
+define <16 x %mystruct*> @test24(<16 x i1> %mask, <16 x %mystruct*>* %addr) {
+ %res = call <16 x %mystruct*> @llvm.masked.load.v16p0mystruct(<16 x %mystruct*>* %addr, i32 4, <16 x i1>%mask, <16 x %mystruct*>zeroinitializer)
+ ret <16 x %mystruct*> %res
+}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
index 8c375ccfd31..abe7d6de3f3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -499,4 +499,146 @@ for.end: ; preds = %for.cond
ret void
}
+; void foo7 (double * __restrict__ out, double ** __restrict__ in,
+; bool * __restrict__ trigger, unsigned size) {
+;
+; for (unsigned i=0; i<size; i++)
+; if (trigger[i] && (in[i] != 0))
+; out[i] = (double) 0.5;
+; }
+
+;AVX512-LABEL: @foo7
+;AVX512: call <8 x double*> @llvm.masked.load.v8p0f64(<8 x double*>*
+;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: ret void
+
+define void @foo7(double* noalias %out, double** noalias %in, i8* noalias %trigger, i32 %size) #0 {
+entry:
+ %out.addr = alloca double*, align 8
+ %in.addr = alloca double**, align 8
+ %trigger.addr = alloca i8*, align 8
+ %size.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ store double* %out, double** %out.addr, align 8
+ store double** %in, double*** %in.addr, align 8
+ store i8* %trigger, i8** %trigger.addr, align 8
+ store i32 %size, i32* %size.addr, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %size.addr, align 4
+ %cmp = icmp ult i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %i, align 4
+ %idxprom = zext i32 %2 to i64
+ %3 = load i8*, i8** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i8, i8* %3, i64 %idxprom
+ %4 = load i8, i8* %arrayidx, align 1
+ %tobool = trunc i8 %4 to i1
+ br i1 %tobool, label %land.lhs.true, label %if.end
+
+land.lhs.true: ; preds = %for.body
+ %5 = load i32, i32* %i, align 4
+ %idxprom1 = zext i32 %5 to i64
+ %6 = load double**, double*** %in.addr, align 8
+ %arrayidx2 = getelementptr inbounds double*, double** %6, i64 %idxprom1
+ %7 = load double*, double** %arrayidx2, align 8
+ %cmp3 = icmp ne double* %7, null
+ br i1 %cmp3, label %if.then, label %if.end
+
+if.then: ; preds = %land.lhs.true
+ %8 = load i32, i32* %i, align 4
+ %idxprom4 = zext i32 %8 to i64
+ %9 = load double*, double** %out.addr, align 8
+ %arrayidx5 = getelementptr inbounds double, double* %9, i64 %idxprom4
+ store double 5.000000e-01, double* %arrayidx5, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then, %land.lhs.true, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %10 = load i32, i32* %i, align 4
+ %inc = add i32 %10, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+;typedef int (*fp)();
+;void foo8 (double* __restrict__ out, fp* __restrict__ in, bool * __restrict__ trigger, unsigned size) {
+;
+; for (unsigned i=0; i<size; i++)
+; if (trigger[i] && (in[i] != 0))
+; out[i] = (double) 0.5;
+;}
+
+;AVX512-LABEL: @foo8
+;AVX512: call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f(<8 x i32 ()*>* %
+;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: ret void
+
+define void @foo8(double* noalias %out, i32 ()** noalias %in, i8* noalias %trigger, i32 %size) #0 {
+entry:
+ %out.addr = alloca double*, align 8
+ %in.addr = alloca i32 ()**, align 8
+ %trigger.addr = alloca i8*, align 8
+ %size.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ store double* %out, double** %out.addr, align 8
+ store i32 ()** %in, i32 ()*** %in.addr, align 8
+ store i8* %trigger, i8** %trigger.addr, align 8
+ store i32 %size, i32* %size.addr, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %size.addr, align 4
+ %cmp = icmp ult i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %i, align 4
+ %idxprom = zext i32 %2 to i64
+ %3 = load i8*, i8** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i8, i8* %3, i64 %idxprom
+ %4 = load i8, i8* %arrayidx, align 1
+ %tobool = trunc i8 %4 to i1
+ br i1 %tobool, label %land.lhs.true, label %if.end
+
+land.lhs.true: ; preds = %for.body
+ %5 = load i32, i32* %i, align 4
+ %idxprom1 = zext i32 %5 to i64
+ %6 = load i32 ()**, i32 ()*** %in.addr, align 8
+ %arrayidx2 = getelementptr inbounds i32 ()*, i32 ()** %6, i64 %idxprom1
+ %7 = load i32 ()*, i32 ()** %arrayidx2, align 8
+ %cmp3 = icmp ne i32 ()* %7, null
+ br i1 %cmp3, label %if.then, label %if.end
+
+if.then: ; preds = %land.lhs.true
+ %8 = load i32, i32* %i, align 4
+ %idxprom4 = zext i32 %8 to i64
+ %9 = load double*, double** %out.addr, align 8
+ %arrayidx5 = getelementptr inbounds double, double* %9, i64 %idxprom4
+ store double 5.000000e-01, double* %arrayidx5, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then, %land.lhs.true, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %10 = load i32, i32* %i, align 4
+ %inc = add i32 %10, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
OpenPOWER on IntegriCloud