diff options
author | Artur Pilipenko <apilipenko@azulsystems.com> | 2016-04-12 15:58:04 +0000 |
---|---|---|
committer | Artur Pilipenko <apilipenko@azulsystems.com> | 2016-04-12 15:58:04 +0000 |
commit | dbe0bc8df44354e261dec40fb663f7a5d42f093f (patch) | |
tree | 26807d8d45dac466fe80438aa159b7cacdf389a9 /llvm/test/Transforms | |
parent | 25570c5423137e6533b15b688d4501d6b771a1dc (diff) | |
download | bcm5719-llvm-dbe0bc8df44354e261dec40fb663f7a5d42f093f.tar.gz bcm5719-llvm-dbe0bc8df44354e261dec40fb663f7a5d42f093f.zip |
Support arbitrary addrspace pointers in masked load/store intrinsics
This is a resubmittion of 263158 change.
This patch fixes the problem which occurs when loop-vectorize tries to use @llvm.masked.load/store intrinsic for a non-default addrspace pointer. It fails with "Calling a function with a bad signature!" assertion in CallInst constructor because it tries to pass a non-default addrspace pointer to the pointer argument which has default addrspace.
The fix is to add pointer type as another overloaded type to @llvm.masked.load/store intrinsics.
Reviewed By: reames
Differential Revision: http://reviews.llvm.org/D17270
llvm-svn: 266086
Diffstat (limited to 'llvm/test/Transforms')
3 files changed, 117 insertions, 42 deletions
diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll index eb30b4a3ffc..0b15f7acebf 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -1,12 +1,12 @@ ; RUN: opt -instcombine -S < %s | FileCheck %s -declare <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0) -declare void @llvm.masked.store.v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask) +declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0) +declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask) declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru) declare void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask) define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru) { - %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru) + %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru) ret <2 x double> %res ; CHECK-LABEL: @load_zeromask( @@ -14,7 +14,7 @@ define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru) } define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru) { - %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 1>, <2 x double> %passthru) + %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 1>, <2 x double> %passthru) ret <2 x double> %res ; CHECK-LABEL: @load_onemask( @@ -23,7 +23,7 @@ define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru) { } define void @store_zeromask(<2 x double>* %ptr, <2 x double> %val) { - call void @llvm.masked.store.v2f64(<2 x double> %val, <2 x double>* %ptr, i32 3, <2 x i1> zeroinitializer) + call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 3, <2 x i1> zeroinitializer) ret void ; CHECK-LABEL: @store_zeromask( @@ -31,7 +31,7 @@ define void @store_zeromask(<2 x double>* %ptr, <2 x double> %val) { } define void @store_onemask(<2 x double>* %ptr, <2 x double> %val) { - call void @llvm.masked.store.v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> <i1 1, i1 1>) + call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> <i1 1, i1 1>) ret void ; CHECK-LABEL: @store_onemask( diff --git a/llvm/test/Transforms/InstCombine/x86-masked-memops.ll b/llvm/test/Transforms/InstCombine/x86-masked-memops.ll index e98fd4527b3..736af173ad4 100644 --- a/llvm/test/Transforms/InstCombine/x86-masked-memops.ll +++ b/llvm/test/Transforms/InstCombine/x86-masked-memops.ll @@ -53,7 +53,7 @@ define <4 x float> @mload_one_one(i8* %f) { ; CHECK-LABEL: @mload_one_one( ; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x float>* -; CHECK-NEXT: %1 = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> undef) +; CHECK-NEXT: %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> undef) ; CHECK-NEXT: ret <4 x float> %1 } @@ -65,7 +65,7 @@ define <2 x double> @mload_one_one_double(i8* %f) { ; CHECK-LABEL: @mload_one_one_double( ; CHECK-NEXT: %castvec = bitcast i8* %f to <2 x double>* -; CHECK-NEXT: %1 = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x double> undef) +; CHECK-NEXT: %1 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x double> undef) ; CHECK-NEXT: ret <2 x double> %1 } @@ -77,7 +77,7 @@ define <8 x float> @mload_v8f32(i8* %f) { ; CHECK-LABEL: @mload_v8f32( ; CHECK-NEXT: %castvec = bitcast i8* %f to <8 x float>* -; CHECK-NEXT: %1 = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x float> undef) +; CHECK-NEXT: %1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x float> undef) ; CHECK-NEXT: ret <8 x float> %1 } @@ -87,7 +87,7 @@ define <4 x double> @mload_v4f64(i8* %f) { ; CHECK-LABEL: @mload_v4f64( ; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x double>* -; CHECK-NEXT: %1 = call <4 x double> @llvm.masked.load.v4f64(<4 x double>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> undef) +; CHECK-NEXT: %1 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> undef) ; CHECK-NEXT: ret <4 x double> %1 } @@ -99,7 +99,7 @@ define <4 x i32> @mload_v4i32(i8* %f) { ; CHECK-LABEL: @mload_v4i32( ; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x i32>* -; CHECK-NEXT: %1 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> undef) +; CHECK-NEXT: %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> %1 } @@ -109,7 +109,7 @@ define <2 x i64> @mload_v2i64(i8* %f) { ; CHECK-LABEL: @mload_v2i64( ; CHECK-NEXT: %castvec = bitcast i8* %f to <2 x i64>* -; CHECK-NEXT: %1 = call <2 x i64> @llvm.masked.load.v2i64(<2 x i64>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x i64> undef) +; CHECK-NEXT: %1 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x i64> undef) ; CHECK-NEXT: ret <2 x i64> %1 } @@ -119,7 +119,7 @@ define <8 x i32> @mload_v8i32(i8* %f) { ; CHECK-LABEL: @mload_v8i32( ; CHECK-NEXT: %castvec = bitcast i8* %f to <8 x i32>* -; CHECK-NEXT: %1 = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i32> undef) +; CHECK-NEXT: %1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i32> undef) ; CHECK-NEXT: ret <8 x i32> %1 } @@ -129,7 +129,7 @@ define <4 x i64> @mload_v4i64(i8* %f) { ; CHECK-LABEL: @mload_v4i64( ; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x i64>* -; CHECK-NEXT: %1 = call <4 x i64> @llvm.masked.load.v4i64(<4 x i64>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> undef) +; CHECK-NEXT: %1 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> undef) ; CHECK-NEXT: ret <4 x i64> %1 } @@ -187,7 +187,7 @@ define void @mstore_one_one(i8* %f, <4 x float> %v) { ; CHECK-LABEL: @mstore_one_one( ; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x float>* -; CHECK-NEXT: call void @llvm.masked.store.v4f32(<4 x float> %v, <4 x float>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>) +; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v, <4 x float>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>) ; CHECK-NEXT: ret void } @@ -199,7 +199,7 @@ define void @mstore_one_one_double(i8* %f, <2 x double> %v) { ; CHECK-LABEL: @mstore_one_one_double( ; CHECK-NEXT: %castvec = bitcast i8* %f to <2 x double>* -; CHECK-NEXT: call void @llvm.masked.store.v2f64(<2 x double> %v, <2 x double>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>) +; CHECK-NEXT: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %v, <2 x double>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>) ; CHECK-NEXT: ret void } @@ -211,7 +211,7 @@ define void @mstore_v8f32(i8* %f, <8 x float> %v) { ; CHECK-LABEL: @mstore_v8f32( ; CHECK-NEXT: %castvec = bitcast i8* %f to <8 x float>* -; CHECK-NEXT: call void @llvm.masked.store.v8f32(<8 x float> %v, <8 x float>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>) +; CHECK-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %v, <8 x float>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>) ; CHECK-NEXT: ret void } @@ -221,7 +221,7 @@ define void @mstore_v4f64(i8* %f, <4 x double> %v) { ; CHECK-LABEL: @mstore_v4f64( ; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x double>* -; CHECK-NEXT: call void @llvm.masked.store.v4f64(<4 x double> %v, <4 x double>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>) +; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %v, <4 x double>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>) ; CHECK-NEXT: ret void } @@ -233,7 +233,7 @@ define void @mstore_v4i32(i8* %f, <4 x i32> %v) { ; CHECK-LABEL: @mstore_v4i32( ; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x i32>* -; CHECK-NEXT: call void @llvm.masked.store.v4i32(<4 x i32> %v, <4 x i32>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 true, i1 true>) +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v, <4 x i32>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 true, i1 true>) ; CHECK-NEXT: ret void } @@ -243,7 +243,7 @@ define void @mstore_v2i64(i8* %f, <2 x i64> %v) { ; CHECK-LABEL: @mstore_v2i64( ; CHECK-NEXT: %castvec = bitcast i8* %f to <2 x i64>* -; CHECK-NEXT: call void @llvm.masked.store.v2i64(<2 x i64> %v, <2 x i64>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>) +; CHECK-NEXT: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %v, <2 x i64>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>) ; CHECK-NEXT: ret void } @@ -253,7 +253,7 @@ define void @mstore_v8i32(i8* %f, <8 x i32> %v) { ; CHECK-LABEL: @mstore_v8i32( ; CHECK-NEXT: %castvec = bitcast i8* %f to <8 x i32>* -; CHECK-NEXT: call void @llvm.masked.store.v8i32(<8 x i32> %v, <8 x i32>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>) +; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %v, <8 x i32>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>) ; CHECK-NEXT: ret void } @@ -263,7 +263,7 @@ define void @mstore_v4i64(i8* %f, <4 x i64> %v) { ; CHECK-LABEL: @mstore_v4i64( ; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x i64>* -; CHECK-NEXT: call void @llvm.masked.store.v4i64(<4 x i64> %v, <4 x i64>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>) +; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %v, <4 x i64>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>) ; CHECK-NEXT: ret void } diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index 268fb61dba3..1227344daff 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -18,16 +18,16 @@ target triple = "x86_64-pc_linux" ;AVX-LABEL: @foo1 ;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100 -;AVX: call <8 x i32> @llvm.masked.load.v8i32 +;AVX: call <8 x i32> @llvm.masked.load.v8i32.p0v8i32 ;AVX: add nsw <8 x i32> -;AVX: call void @llvm.masked.store.v8i32 +;AVX: call void @llvm.masked.store.v8i32.p0v8i32 ;AVX: ret void ;AVX512-LABEL: @foo1 ;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100 -;AVX512: call <16 x i32> @llvm.masked.load.v16i32 +;AVX512: call <16 x i32> @llvm.masked.load.v16i32.p0v16i32 ;AVX512: add nsw <16 x i32> -;AVX512: call void @llvm.masked.store.v16i32 +;AVX512: call void @llvm.masked.store.v16i32.p0v16i32 ;AVX512: ret void ; Function Attrs: nounwind uwtable @@ -89,6 +89,81 @@ for.end: ; preds = %for.cond ret void } +; The same as @foo1 but all the pointers are address space 1 pointers. + +;AVX-LABEL: @foo1_addrspace1 +;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100 +;AVX: call <8 x i32> @llvm.masked.load.v8i32.p1v8i32 +;AVX: add nsw <8 x i32> +;AVX: call void @llvm.masked.store.v8i32.p1v8i32 +;AVX: ret void + +;AVX512-LABEL: @foo1_addrspace1 +;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100 +;AVX512: call <16 x i32> @llvm.masked.load.v16i32.p1v16i32 +;AVX512: add nsw <16 x i32> +;AVX512: call void @llvm.masked.store.v16i32.p1v16i32 +;AVX512: ret void + +; Function Attrs: nounwind uwtable +define void @foo1_addrspace1(i32 addrspace(1)* %A, i32 addrspace(1)* %B, i32 addrspace(1)* %trigger) { +entry: + %A.addr = alloca i32 addrspace(1)*, align 8 + %B.addr = alloca i32 addrspace(1)*, align 8 + %trigger.addr = alloca i32 addrspace(1)*, align 8 + %i = alloca i32, align 4 + store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 8 + store i32 addrspace(1)* %B, i32 addrspace(1)** %B.addr, align 8 + store i32 addrspace(1)* %trigger, i32 addrspace(1)** %trigger.addr, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 10000 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32, i32* %i, align 4 + %idxprom = sext i32 %1 to i64 + %2 = load i32 addrspace(1)*, i32 addrspace(1)** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %2, i64 %idxprom + %3 = load i32, i32 addrspace(1)* %arrayidx, align 4 + %cmp1 = icmp slt i32 %3, 100 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %4 = load i32, i32* %i, align 4 + %idxprom2 = sext i32 %4 to i64 + %5 = load i32 addrspace(1)*, i32 addrspace(1)** %B.addr, align 8 + %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %5, i64 %idxprom2 + %6 = load i32, i32 addrspace(1)* %arrayidx3, align 4 + %7 = load i32, i32* %i, align 4 + %idxprom4 = sext i32 %7 to i64 + %8 = load i32 addrspace(1)*, i32 addrspace(1)** %trigger.addr, align 8 + %arrayidx5 = getelementptr inbounds i32, i32 addrspace(1)* %8, i64 %idxprom4 + %9 = load i32, i32 addrspace(1)* %arrayidx5, align 4 + %add = add nsw i32 %6, %9 + %10 = load i32, i32* %i, align 4 + %idxprom6 = sext i32 %10 to i64 + %11 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 8 + %arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %11, i64 %idxprom6 + store i32 %add, i32 addrspace(1)* %arrayidx7, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %12 = load i32, i32* %i, align 4 + %inc = add nsw i32 %12, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + ; The source code: ; ;void foo2(float *A, float *B, int *trigger) { @@ -102,16 +177,16 @@ for.end: ; preds = %for.cond ;AVX-LABEL: @foo2 ;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100 -;AVX: call <8 x float> @llvm.masked.load.v8f32 +;AVX: call <8 x float> @llvm.masked.load.v8f32.p0v8f32 ;AVX: fadd <8 x float> -;AVX: call void @llvm.masked.store.v8f32 +;AVX: call void @llvm.masked.store.v8f32.p0v8f32 ;AVX: ret void ;AVX512-LABEL: @foo2 ;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100 -;AVX512: call <16 x float> @llvm.masked.load.v16f32 +;AVX512: call <16 x float> @llvm.masked.load.v16f32.p0v16f32 ;AVX512: fadd <16 x float> -;AVX512: call void @llvm.masked.store.v16f32 +;AVX512: call void @llvm.masked.store.v16f32.p0v16f32 ;AVX512: ret void ; Function Attrs: nounwind uwtable @@ -187,18 +262,18 @@ for.end: ; preds = %for.cond ;AVX-LABEL: @foo3 ;AVX: icmp slt <4 x i32> %wide.load, <i32 100, i32 100, -;AVX: call <4 x double> @llvm.masked.load.v4f64 +;AVX: call <4 x double> @llvm.masked.load.v4f64.p0v4f64 ;AVX: sitofp <4 x i32> %wide.load to <4 x double> ;AVX: fadd <4 x double> -;AVX: call void @llvm.masked.store.v4f64 +;AVX: call void @llvm.masked.store.v4f64.p0v4f64 ;AVX: ret void ;AVX512-LABEL: @foo3 ;AVX512: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, -;AVX512: call <8 x double> @llvm.masked.load.v8f64 +;AVX512: call <8 x double> @llvm.masked.load.v8f64.p0v8f64 ;AVX512: sitofp <8 x i32> %wide.load to <8 x double> ;AVX512: fadd <8 x double> -;AVX512: call void @llvm.masked.store.v8f64 +;AVX512: call void @llvm.masked.store.v8f64.p0v8f64 ;AVX512: ret void @@ -429,17 +504,17 @@ for.end: ; preds = %for.cond ;AVX2-LABEL: @foo6 ;AVX2: icmp sgt <4 x i32> %reverse, zeroinitializer ;AVX2: shufflevector <4 x i1>{{.*}}<4 x i32> <i32 3, i32 2, i32 1, i32 0> -;AVX2: call <4 x double> @llvm.masked.load.v4f64 +;AVX2: call <4 x double> @llvm.masked.load.v4f64.p0v4f64 ;AVX2: fadd <4 x double> -;AVX2: call void @llvm.masked.store.v4f64 +;AVX2: call void @llvm.masked.store.v4f64.p0v4f64 ;AVX2: ret void ;AVX512-LABEL: @foo6 ;AVX512: icmp sgt <8 x i32> %reverse, zeroinitializer ;AVX512: shufflevector <8 x i1>{{.*}}<8 x i32> <i32 7, i32 6, i32 5, i32 4 -;AVX512: call <8 x double> @llvm.masked.load.v8f64 +;AVX512: call <8 x double> @llvm.masked.load.v8f64.p0v8f64 ;AVX512: fadd <8 x double> -;AVX512: call void @llvm.masked.store.v8f64 +;AVX512: call void @llvm.masked.store.v8f64.p0v8f64 ;AVX512: ret void @@ -507,8 +582,8 @@ for.end: ; preds = %for.cond ; } ;AVX512-LABEL: @foo7 -;AVX512: call <8 x double*> @llvm.masked.load.v8p0f64(<8 x double*>* -;AVX512: call void @llvm.masked.store.v8f64 +;AVX512: call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* +;AVX512: call void @llvm.masked.store.v8f64.p0v8f64 ;AVX512: ret void define void @foo7(double* noalias %out, double** noalias %in, i8* noalias %trigger, i32 %size) #0 { @@ -579,8 +654,8 @@ for.end: ; preds = %for.cond ;} ;AVX512-LABEL: @foo8 -;AVX512: call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f(<8 x i32 ()*>* % -;AVX512: call void @llvm.masked.store.v8f64 +;AVX512: call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* % +;AVX512: call void @llvm.masked.store.v8f64.p0v8f64 ;AVX512: ret void define void @foo8(double* noalias %out, i32 ()** noalias %in, i8* noalias %trigger, i32 %size) #0 { |