diff options
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/X86/masked_gather_scatter.ll | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/masked_memop.ll | 25 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll | 142 |
3 files changed, 177 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index b65146ed417..19d2c891f85 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -330,3 +330,13 @@ define <3 x i32> @test16(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x ret <3 x i32>%res } +declare <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>) + +; KNL-LABEL: test17 +; KNL: vpgatherqq +; KNL: vpgatherqq +define <16 x float*> @test17(<16 x float**> %ptrs) { + + %res = call <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float*> undef) + ret <16 x float*>%res +} diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll index 84ebdf1ef18..a720054c167 100644 --- a/llvm/test/CodeGen/X86/masked_memop.ll +++ b/llvm/test/CodeGen/X86/masked_memop.ll @@ -300,3 +300,28 @@ declare void @llvm.masked.store.v8f64(<8 x double>, <8 x double>*, i32, <8 x i1> declare void @llvm.masked.store.v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>) declare void @llvm.masked.store.v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>) +declare <16 x i32*> @llvm.masked.load.v16p0i32(<16 x i32*>*, i32, <16 x i1>, <16 x i32*>) + +; AVX512-LABEL: test23 +; AVX512: vmovdqu64 64(%rdi), %zmm1 {%k2} {z} +; AVX512: vmovdqu64 (%rdi), %zmm0 {%k1} {z} + +define <16 x i32*> @test23(<16 x i32*> %trigger, <16 x i32*>* %addr) { + %mask = icmp eq <16 x i32*> %trigger, zeroinitializer + %res = call <16 x i32*> @llvm.masked.load.v16p0i32(<16 x i32*>* %addr, i32 4, <16 x i1>%mask, <16 x i32*>zeroinitializer) + ret <16 x i32*> %res +} + +%mystruct = type { i16, i16, [1 x i8*] } + +declare <16 x %mystruct*> @llvm.masked.load.v16p0mystruct(<16 x %mystruct*>*, i32, <16 x i1>, <16 x %mystruct*>) + +; AVX512-LABEL: test24 +; AVX512: vmovdqu64 (%rdi), %zmm0 {%k1} {z} +; AVX512: kshiftrw $8, %k1, %k1 +; AVX512: vmovdqu64 64(%rdi), %zmm1 {%k1} {z} + +define <16 x %mystruct*> @test24(<16 x i1> %mask, <16 x %mystruct*>* %addr) { + %res = call <16 x %mystruct*> @llvm.masked.load.v16p0mystruct(<16 x %mystruct*>* %addr, i32 4, <16 x i1>%mask, <16 x %mystruct*>zeroinitializer) + ret <16 x %mystruct*> %res +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index 8c375ccfd31..abe7d6de3f3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -499,4 +499,146 @@ for.end: ; preds = %for.cond ret void } +; void foo7 (double * __restrict__ out, double ** __restrict__ in, +; bool * __restrict__ trigger, unsigned size) { +; +; for (unsigned i=0; i<size; i++) +; if (trigger[i] && (in[i] != 0)) +; out[i] = (double) 0.5; +; } + +;AVX512-LABEL: @foo7 +;AVX512: call <8 x double*> @llvm.masked.load.v8p0f64(<8 x double*>* +;AVX512: call void @llvm.masked.store.v8f64 +;AVX512: ret void + +define void @foo7(double* noalias %out, double** noalias %in, i8* noalias %trigger, i32 %size) #0 { +entry: + %out.addr = alloca double*, align 8 + %in.addr = alloca double**, align 8 + %trigger.addr = alloca i8*, align 8 + %size.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store double* %out, double** %out.addr, align 8 + store double** %in, double*** %in.addr, align 8 + store i8* %trigger, i8** %trigger.addr, align 8 + store i32 %size, i32* %size.addr, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %1 = load i32, i32* %size.addr, align 4 + %cmp = icmp ult i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %2 = load i32, i32* %i, align 4 + %idxprom = zext i32 %2 to i64 + %3 = load i8*, i8** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i8, i8* %3, i64 %idxprom + %4 = load i8, i8* %arrayidx, align 1 + %tobool = trunc i8 %4 to i1 + br i1 %tobool, label %land.lhs.true, label %if.end + +land.lhs.true: ; preds = %for.body + %5 = load i32, i32* %i, align 4 + %idxprom1 = zext i32 %5 to i64 + %6 = load double**, double*** %in.addr, align 8 + %arrayidx2 = getelementptr inbounds double*, double** %6, i64 %idxprom1 + %7 = load double*, double** %arrayidx2, align 8 + %cmp3 = icmp ne double* %7, null + br i1 %cmp3, label %if.then, label %if.end + +if.then: ; preds = %land.lhs.true + %8 = load i32, i32* %i, align 4 + %idxprom4 = zext i32 %8 to i64 + %9 = load double*, double** %out.addr, align 8 + %arrayidx5 = getelementptr inbounds double, double* %9, i64 %idxprom4 + store double 5.000000e-01, double* %arrayidx5, align 8 + br label %if.end + +if.end: ; preds = %if.then, %land.lhs.true, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %10 = load i32, i32* %i, align 4 + %inc = add i32 %10, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} +;typedef int (*fp)(); +;void foo8 (double* __restrict__ out, fp* __restrict__ in, bool * __restrict__ trigger, unsigned size) { +; +; for (unsigned i=0; i<size; i++) +; if (trigger[i] && (in[i] != 0)) +; out[i] = (double) 0.5; +;} + +;AVX512-LABEL: @foo8 +;AVX512: call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f(<8 x i32 ()*>* % +;AVX512: call void @llvm.masked.store.v8f64 +;AVX512: ret void + +define void @foo8(double* noalias %out, i32 ()** noalias %in, i8* noalias %trigger, i32 %size) #0 { +entry: + %out.addr = alloca double*, align 8 + %in.addr = alloca i32 ()**, align 8 + %trigger.addr = alloca i8*, align 8 + %size.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store double* %out, double** %out.addr, align 8 + store i32 ()** %in, i32 ()*** %in.addr, align 8 + store i8* %trigger, i8** %trigger.addr, align 8 + store i32 %size, i32* %size.addr, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %1 = load i32, i32* %size.addr, align 4 + %cmp = icmp ult i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %2 = load i32, i32* %i, align 4 + %idxprom = zext i32 %2 to i64 + %3 = load i8*, i8** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i8, i8* %3, i64 %idxprom + %4 = load i8, i8* %arrayidx, align 1 + %tobool = trunc i8 %4 to i1 + br i1 %tobool, label %land.lhs.true, label %if.end + +land.lhs.true: ; preds = %for.body + %5 = load i32, i32* %i, align 4 + %idxprom1 = zext i32 %5 to i64 + %6 = load i32 ()**, i32 ()*** %in.addr, align 8 + %arrayidx2 = getelementptr inbounds i32 ()*, i32 ()** %6, i64 %idxprom1 + %7 = load i32 ()*, i32 ()** %arrayidx2, align 8 + %cmp3 = icmp ne i32 ()* %7, null + br i1 %cmp3, label %if.then, label %if.end + +if.then: ; preds = %land.lhs.true + %8 = load i32, i32* %i, align 4 + %idxprom4 = zext i32 %8 to i64 + %9 = load double*, double** %out.addr, align 8 + %arrayidx5 = getelementptr inbounds double, double* %9, i64 %idxprom4 + store double 5.000000e-01, double* %arrayidx5, align 8 + br label %if.end + +if.end: ; preds = %if.then, %land.lhs.true, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %10 = load i32, i32* %i, align 4 + %inc = add i32 %10, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} |