summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoopVectorize
diff options
context:
space:
mode:
authorJohannes Doerfert <jdoerfert@anl.gov>2019-01-26 23:40:35 +0000
committerJohannes Doerfert <jdoerfert@anl.gov>2019-01-26 23:40:35 +0000
commit00102c7d9579745851320bd4373db1ce2612b2c4 (patch)
tree5fcf5539fb0fba2fb16a6bf5f1b5402048f21d97 /llvm/test/Transforms/LoopVectorize
parent204bf2bbb265b5158783bacf9a67b0bc4cc57747 (diff)
downloadbcm5719-llvm-00102c7d9579745851320bd4373db1ce2612b2c4.tar.gz
bcm5719-llvm-00102c7d9579745851320bd4373db1ce2612b2c4.zip
[ValueTracking] Look through casts when determining non-nullness
Bitcast and certain Ptr2Int/Int2Ptr instructions will not alter the value of their operand and can therefore be looked through when we determine non-nullness. Differential Revision: https://reviews.llvm.org/D54956 llvm-svn: 352293
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll12
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll114
2 files changed, 63 insertions, 63 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
index 95be7eb76b3..21ed2f2fb39 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
@@ -47,7 +47,7 @@ define void @foo1(float* noalias %in, float* noalias %out, i32* noalias %trigger
; AVX512-NEXT: [[TMP12:%.*]] = icmp sgt <16 x i32> [[WIDE_LOAD_1]], zeroinitializer
; AVX512-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[INDEX]], i64 [[INDEX_NEXT]]
; AVX512-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <16 x i32>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD_1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP14]], i32 4, <16 x i1> [[TMP12]], <16 x i32> undef)
+; AVX512-NEXT: [[WIDE_MASKED_LOAD_1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* nonnull [[TMP14]], i32 4, <16 x i1> [[TMP12]], <16 x i32> undef)
; AVX512-NEXT: [[TMP15:%.*]] = sext <16 x i32> [[WIDE_MASKED_LOAD_1]] to <16 x i64>
; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[IN]], <16 x i64> [[TMP15]]
; AVX512-NEXT: [[WIDE_MASKED_GATHER_1:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> [[TMP16]], i32 4, <16 x i1> [[TMP12]], <16 x float> undef)
@@ -62,7 +62,7 @@ define void @foo1(float* noalias %in, float* noalias %out, i32* noalias %trigger
; AVX512-NEXT: [[TMP22:%.*]] = icmp sgt <16 x i32> [[WIDE_LOAD_2]], zeroinitializer
; AVX512-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[INDEX]], i64 [[INDEX_NEXT_1]]
; AVX512-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <16 x i32>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD_2:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP24]], i32 4, <16 x i1> [[TMP22]], <16 x i32> undef)
+; AVX512-NEXT: [[WIDE_MASKED_LOAD_2:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* nonnull [[TMP24]], i32 4, <16 x i1> [[TMP22]], <16 x i32> undef)
; AVX512-NEXT: [[TMP25:%.*]] = sext <16 x i32> [[WIDE_MASKED_LOAD_2]] to <16 x i64>
; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[IN]], <16 x i64> [[TMP25]]
; AVX512-NEXT: [[WIDE_MASKED_GATHER_2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> [[TMP26]], i32 4, <16 x i1> [[TMP22]], <16 x float> undef)
@@ -77,7 +77,7 @@ define void @foo1(float* noalias %in, float* noalias %out, i32* noalias %trigger
; AVX512-NEXT: [[TMP32:%.*]] = icmp sgt <16 x i32> [[WIDE_LOAD_3]], zeroinitializer
; AVX512-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[INDEX]], i64 [[INDEX_NEXT_2]]
; AVX512-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <16 x i32>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD_3:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP34]], i32 4, <16 x i1> [[TMP32]], <16 x i32> undef)
+; AVX512-NEXT: [[WIDE_MASKED_LOAD_3:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* nonnull [[TMP34]], i32 4, <16 x i1> [[TMP32]], <16 x i32> undef)
; AVX512-NEXT: [[TMP35:%.*]] = sext <16 x i32> [[WIDE_MASKED_LOAD_3]] to <16 x i64>
; AVX512-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[IN]], <16 x i64> [[TMP35]]
; AVX512-NEXT: [[WIDE_MASKED_GATHER_3:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> [[TMP36]], i32 4, <16 x i1> [[TMP32]], <16 x float> undef)
@@ -117,7 +117,7 @@ define void @foo1(float* noalias %in, float* noalias %out, i32* noalias %trigger
; FVW2-NEXT: [[TMP12:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD_1]], zeroinitializer
; FVW2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[INDEX]], i64 [[INDEX_NEXT]]
; FVW2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <2 x i32>*
-; FVW2-NEXT: [[WIDE_MASKED_LOAD_1:%.*]] = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* [[TMP14]], i32 4, <2 x i1> [[TMP12]], <2 x i32> undef)
+; FVW2-NEXT: [[WIDE_MASKED_LOAD_1:%.*]] = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* nonnull [[TMP14]], i32 4, <2 x i1> [[TMP12]], <2 x i32> undef)
; FVW2-NEXT: [[TMP15:%.*]] = sext <2 x i32> [[WIDE_MASKED_LOAD_1]] to <2 x i64>
; FVW2-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[IN]], <2 x i64> [[TMP15]]
; FVW2-NEXT: [[WIDE_MASKED_GATHER_1:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> [[TMP16]], i32 4, <2 x i1> [[TMP12]], <2 x float> undef)
@@ -132,7 +132,7 @@ define void @foo1(float* noalias %in, float* noalias %out, i32* noalias %trigger
; FVW2-NEXT: [[TMP22:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD_2]], zeroinitializer
; FVW2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[INDEX]], i64 [[INDEX_NEXT_1]]
; FVW2-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <2 x i32>*
-; FVW2-NEXT: [[WIDE_MASKED_LOAD_2:%.*]] = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* [[TMP24]], i32 4, <2 x i1> [[TMP22]], <2 x i32> undef)
+; FVW2-NEXT: [[WIDE_MASKED_LOAD_2:%.*]] = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* nonnull [[TMP24]], i32 4, <2 x i1> [[TMP22]], <2 x i32> undef)
; FVW2-NEXT: [[TMP25:%.*]] = sext <2 x i32> [[WIDE_MASKED_LOAD_2]] to <2 x i64>
; FVW2-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[IN]], <2 x i64> [[TMP25]]
; FVW2-NEXT: [[WIDE_MASKED_GATHER_2:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> [[TMP26]], i32 4, <2 x i1> [[TMP22]], <2 x float> undef)
@@ -147,7 +147,7 @@ define void @foo1(float* noalias %in, float* noalias %out, i32* noalias %trigger
; FVW2-NEXT: [[TMP32:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD_3]], zeroinitializer
; FVW2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[INDEX]], i64 [[INDEX_NEXT_2]]
; FVW2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <2 x i32>*
-; FVW2-NEXT: [[WIDE_MASKED_LOAD_3:%.*]] = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* [[TMP34]], i32 4, <2 x i1> [[TMP32]], <2 x i32> undef)
+; FVW2-NEXT: [[WIDE_MASKED_LOAD_3:%.*]] = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* nonnull [[TMP34]], i32 4, <2 x i1> [[TMP32]], <2 x i32> undef)
; FVW2-NEXT: [[TMP35:%.*]] = sext <2 x i32> [[WIDE_MASKED_LOAD_3]] to <2 x i64>
; FVW2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[IN]], <2 x i64> [[TMP35]]
; FVW2-NEXT: [[WIDE_MASKED_GATHER_3:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> [[TMP36]], i32 4, <2 x i1> [[TMP32]], <2 x float> undef)
diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
index 71038feec7b..cdb985952a7 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -52,7 +52,7 @@ define void @foo1(i32* %A, i32* %B, i32* %trigger) {
; AVX1-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD_1]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
; AVX1-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX_NEXT]]
; AVX1-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <8 x i32>*
-; AVX1-NEXT: [[WIDE_MASKED_LOAD_1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP12]], i32 4, <8 x i1> [[TMP10]], <8 x i32> undef), !alias.scope !3
+; AVX1-NEXT: [[WIDE_MASKED_LOAD_1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* nonnull [[TMP12]], i32 4, <8 x i1> [[TMP10]], <8 x i32> undef), !alias.scope !3
; AVX1-NEXT: [[TMP13:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD_1]], [[WIDE_LOAD_1]]
; AVX1-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDEX_NEXT]]
; AVX1-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <8 x i32>*
@@ -129,13 +129,13 @@ define void @foo1(i32* %A, i32* %B, i32* %trigger) {
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP13]], i32 4, <8 x i1> [[TMP8]], <8 x i32> undef), !alias.scope !3
; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i64 8
; AVX2-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <8 x i32>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP15]], i32 4, <8 x i1> [[TMP9]], <8 x i32> undef), !alias.scope !3
+; AVX2-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* nonnull [[TMP15]], i32 4, <8 x i1> [[TMP9]], <8 x i32> undef), !alias.scope !3
; AVX2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i64 16
; AVX2-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <8 x i32>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP17]], i32 4, <8 x i1> [[TMP10]], <8 x i32> undef), !alias.scope !3
+; AVX2-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* nonnull [[TMP17]], i32 4, <8 x i1> [[TMP10]], <8 x i32> undef), !alias.scope !3
; AVX2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i64 24
; AVX2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <8 x i32>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP19]], i32 4, <8 x i1> [[TMP11]], <8 x i32> undef), !alias.scope !3
+; AVX2-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* nonnull [[TMP19]], i32 4, <8 x i1> [[TMP11]], <8 x i32> undef), !alias.scope !3
; AVX2-NEXT: [[TMP20:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]]
; AVX2-NEXT: [[TMP21:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD25]], [[WIDE_LOAD22]]
; AVX2-NEXT: [[TMP22:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD26]], [[WIDE_LOAD23]]
@@ -171,16 +171,16 @@ define void @foo1(i32* %A, i32* %B, i32* %trigger) {
; AVX2-NEXT: [[TMP43:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD24_1]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
; AVX2-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX_NEXT]]
; AVX2-NEXT: [[TMP45:%.*]] = bitcast i32* [[TMP44]] to <8 x i32>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD_1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP45]], i32 4, <8 x i1> [[TMP40]], <8 x i32> undef), !alias.scope !3
+; AVX2-NEXT: [[WIDE_MASKED_LOAD_1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* nonnull [[TMP45]], i32 4, <8 x i1> [[TMP40]], <8 x i32> undef), !alias.scope !3
; AVX2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[TMP44]], i64 8
; AVX2-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <8 x i32>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD25_1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP47]], i32 4, <8 x i1> [[TMP41]], <8 x i32> undef), !alias.scope !3
+; AVX2-NEXT: [[WIDE_MASKED_LOAD25_1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* nonnull [[TMP47]], i32 4, <8 x i1> [[TMP41]], <8 x i32> undef), !alias.scope !3
; AVX2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[TMP44]], i64 16
; AVX2-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <8 x i32>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD26_1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP49]], i32 4, <8 x i1> [[TMP42]], <8 x i32> undef), !alias.scope !3
+; AVX2-NEXT: [[WIDE_MASKED_LOAD26_1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* nonnull [[TMP49]], i32 4, <8 x i1> [[TMP42]], <8 x i32> undef), !alias.scope !3
; AVX2-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, i32* [[TMP44]], i64 24
; AVX2-NEXT: [[TMP51:%.*]] = bitcast i32* [[TMP50]] to <8 x i32>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD27_1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP51]], i32 4, <8 x i1> [[TMP43]], <8 x i32> undef), !alias.scope !3
+; AVX2-NEXT: [[WIDE_MASKED_LOAD27_1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* nonnull [[TMP51]], i32 4, <8 x i1> [[TMP43]], <8 x i32> undef), !alias.scope !3
; AVX2-NEXT: [[TMP52:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD_1]], [[WIDE_LOAD_1]]
; AVX2-NEXT: [[TMP53:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD25_1]], [[WIDE_LOAD22_1]]
; AVX2-NEXT: [[TMP54:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD26_1]], [[WIDE_LOAD23_1]]
@@ -298,13 +298,13 @@ define void @foo1(i32* %A, i32* %B, i32* %trigger) {
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP13]], i32 4, <16 x i1> [[TMP8]], <16 x i32> undef), !alias.scope !3
; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i64 16
; AVX512-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <16 x i32>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP15]], i32 4, <16 x i1> [[TMP9]], <16 x i32> undef), !alias.scope !3
+; AVX512-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* nonnull [[TMP15]], i32 4, <16 x i1> [[TMP9]], <16 x i32> undef), !alias.scope !3
; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i64 32
; AVX512-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <16 x i32>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP17]], i32 4, <16 x i1> [[TMP10]], <16 x i32> undef), !alias.scope !3
+; AVX512-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* nonnull [[TMP17]], i32 4, <16 x i1> [[TMP10]], <16 x i32> undef), !alias.scope !3
; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i64 48
; AVX512-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <16 x i32>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP19]], i32 4, <16 x i1> [[TMP11]], <16 x i32> undef), !alias.scope !3
+; AVX512-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* nonnull [[TMP19]], i32 4, <16 x i1> [[TMP11]], <16 x i32> undef), !alias.scope !3
; AVX512-NEXT: [[TMP20:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]]
; AVX512-NEXT: [[TMP21:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD25]], [[WIDE_LOAD22]]
; AVX512-NEXT: [[TMP22:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD26]], [[WIDE_LOAD23]]
@@ -340,16 +340,16 @@ define void @foo1(i32* %A, i32* %B, i32* %trigger) {
; AVX512-NEXT: [[TMP43:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD24_1]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
; AVX512-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX_NEXT]]
; AVX512-NEXT: [[TMP45:%.*]] = bitcast i32* [[TMP44]] to <16 x i32>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD_1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP45]], i32 4, <16 x i1> [[TMP40]], <16 x i32> undef), !alias.scope !3
+; AVX512-NEXT: [[WIDE_MASKED_LOAD_1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* nonnull [[TMP45]], i32 4, <16 x i1> [[TMP40]], <16 x i32> undef), !alias.scope !3
; AVX512-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[TMP44]], i64 16
; AVX512-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <16 x i32>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD25_1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP47]], i32 4, <16 x i1> [[TMP41]], <16 x i32> undef), !alias.scope !3
+; AVX512-NEXT: [[WIDE_MASKED_LOAD25_1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* nonnull [[TMP47]], i32 4, <16 x i1> [[TMP41]], <16 x i32> undef), !alias.scope !3
; AVX512-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[TMP44]], i64 32
; AVX512-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <16 x i32>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD26_1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP49]], i32 4, <16 x i1> [[TMP42]], <16 x i32> undef), !alias.scope !3
+; AVX512-NEXT: [[WIDE_MASKED_LOAD26_1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* nonnull [[TMP49]], i32 4, <16 x i1> [[TMP42]], <16 x i32> undef), !alias.scope !3
; AVX512-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, i32* [[TMP44]], i64 48
; AVX512-NEXT: [[TMP51:%.*]] = bitcast i32* [[TMP50]] to <16 x i32>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD27_1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP51]], i32 4, <16 x i1> [[TMP43]], <16 x i32> undef), !alias.scope !3
+; AVX512-NEXT: [[WIDE_MASKED_LOAD27_1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* nonnull [[TMP51]], i32 4, <16 x i1> [[TMP43]], <16 x i32> undef), !alias.scope !3
; AVX512-NEXT: [[TMP52:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD_1]], [[WIDE_LOAD_1]]
; AVX512-NEXT: [[TMP53:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD25_1]], [[WIDE_LOAD22_1]]
; AVX512-NEXT: [[TMP54:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD26_1]], [[WIDE_LOAD23_1]]
@@ -1012,13 +1012,13 @@ define void @foo2(float* %A, float* %B, i32* %trigger) {
; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP15]], i32 4, <8 x i1> [[TMP10]], <8 x float> undef), !alias.scope !24
; AVX1-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 8
; AVX1-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <8 x float>*
-; AVX1-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP17]], i32 4, <8 x i1> [[TMP11]], <8 x float> undef), !alias.scope !24
+; AVX1-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* nonnull [[TMP17]], i32 4, <8 x i1> [[TMP11]], <8 x float> undef), !alias.scope !24
; AVX1-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 16
; AVX1-NEXT: [[TMP19:%.*]] = bitcast float* [[TMP18]] to <8 x float>*
-; AVX1-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP19]], i32 4, <8 x i1> [[TMP12]], <8 x float> undef), !alias.scope !24
+; AVX1-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* nonnull [[TMP19]], i32 4, <8 x i1> [[TMP12]], <8 x float> undef), !alias.scope !24
; AVX1-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 24
; AVX1-NEXT: [[TMP21:%.*]] = bitcast float* [[TMP20]] to <8 x float>*
-; AVX1-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP21]], i32 4, <8 x i1> [[TMP13]], <8 x float> undef), !alias.scope !24
+; AVX1-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* nonnull [[TMP21]], i32 4, <8 x i1> [[TMP13]], <8 x float> undef), !alias.scope !24
; AVX1-NEXT: [[TMP22:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x float>
; AVX1-NEXT: [[TMP23:%.*]] = sitofp <8 x i32> [[WIDE_LOAD22]] to <8 x float>
; AVX1-NEXT: [[TMP24:%.*]] = sitofp <8 x i32> [[WIDE_LOAD23]] to <8 x float>
@@ -1118,13 +1118,13 @@ define void @foo2(float* %A, float* %B, i32* %trigger) {
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP15]], i32 4, <8 x i1> [[TMP10]], <8 x float> undef), !alias.scope !24
; AVX2-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 8
; AVX2-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <8 x float>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP17]], i32 4, <8 x i1> [[TMP11]], <8 x float> undef), !alias.scope !24
+; AVX2-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* nonnull [[TMP17]], i32 4, <8 x i1> [[TMP11]], <8 x float> undef), !alias.scope !24
; AVX2-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 16
; AVX2-NEXT: [[TMP19:%.*]] = bitcast float* [[TMP18]] to <8 x float>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP19]], i32 4, <8 x i1> [[TMP12]], <8 x float> undef), !alias.scope !24
+; AVX2-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* nonnull [[TMP19]], i32 4, <8 x i1> [[TMP12]], <8 x float> undef), !alias.scope !24
; AVX2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 24
; AVX2-NEXT: [[TMP21:%.*]] = bitcast float* [[TMP20]] to <8 x float>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP21]], i32 4, <8 x i1> [[TMP13]], <8 x float> undef), !alias.scope !24
+; AVX2-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* nonnull [[TMP21]], i32 4, <8 x i1> [[TMP13]], <8 x float> undef), !alias.scope !24
; AVX2-NEXT: [[TMP22:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x float>
; AVX2-NEXT: [[TMP23:%.*]] = sitofp <8 x i32> [[WIDE_LOAD22]] to <8 x float>
; AVX2-NEXT: [[TMP24:%.*]] = sitofp <8 x i32> [[WIDE_LOAD23]] to <8 x float>
@@ -1252,13 +1252,13 @@ define void @foo2(float* %A, float* %B, i32* %trigger) {
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP15]], i32 4, <16 x i1> [[TMP10]], <16 x float> undef), !alias.scope !24
; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 16
; AVX512-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <16 x float>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP17]], i32 4, <16 x i1> [[TMP11]], <16 x float> undef), !alias.scope !24
+; AVX512-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* nonnull [[TMP17]], i32 4, <16 x i1> [[TMP11]], <16 x float> undef), !alias.scope !24
; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 32
; AVX512-NEXT: [[TMP19:%.*]] = bitcast float* [[TMP18]] to <16 x float>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP19]], i32 4, <16 x i1> [[TMP12]], <16 x float> undef), !alias.scope !24
+; AVX512-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* nonnull [[TMP19]], i32 4, <16 x i1> [[TMP12]], <16 x float> undef), !alias.scope !24
; AVX512-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 48
; AVX512-NEXT: [[TMP21:%.*]] = bitcast float* [[TMP20]] to <16 x float>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP21]], i32 4, <16 x i1> [[TMP13]], <16 x float> undef), !alias.scope !24
+; AVX512-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* nonnull [[TMP21]], i32 4, <16 x i1> [[TMP13]], <16 x float> undef), !alias.scope !24
; AVX512-NEXT: [[TMP22:%.*]] = sitofp <16 x i32> [[WIDE_LOAD]] to <16 x float>
; AVX512-NEXT: [[TMP23:%.*]] = sitofp <16 x i32> [[WIDE_LOAD22]] to <16 x float>
; AVX512-NEXT: [[TMP24:%.*]] = sitofp <16 x i32> [[WIDE_LOAD23]] to <16 x float>
@@ -1457,13 +1457,13 @@ define void @foo3(double* %A, double* %B, i32* %trigger) #0 {
; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP15]], i32 8, <4 x i1> [[TMP10]], <4 x double> undef), !alias.scope !34
; AVX1-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[TMP14]], i64 4
; AVX1-NEXT: [[TMP17:%.*]] = bitcast double* [[TMP16]] to <4 x double>*
-; AVX1-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP17]], i32 8, <4 x i1> [[TMP11]], <4 x double> undef), !alias.scope !34
+; AVX1-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull [[TMP17]], i32 8, <4 x i1> [[TMP11]], <4 x double> undef), !alias.scope !34
; AVX1-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, double* [[TMP14]], i64 8
; AVX1-NEXT: [[TMP19:%.*]] = bitcast double* [[TMP18]] to <4 x double>*
-; AVX1-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP19]], i32 8, <4 x i1> [[TMP12]], <4 x double> undef), !alias.scope !34
+; AVX1-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull [[TMP19]], i32 8, <4 x i1> [[TMP12]], <4 x double> undef), !alias.scope !34
; AVX1-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[TMP14]], i64 12
; AVX1-NEXT: [[TMP21:%.*]] = bitcast double* [[TMP20]] to <4 x double>*
-; AVX1-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP21]], i32 8, <4 x i1> [[TMP13]], <4 x double> undef), !alias.scope !34
+; AVX1-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull [[TMP21]], i32 8, <4 x i1> [[TMP13]], <4 x double> undef), !alias.scope !34
; AVX1-NEXT: [[TMP22:%.*]] = sitofp <4 x i32> [[WIDE_LOAD]] to <4 x double>
; AVX1-NEXT: [[TMP23:%.*]] = sitofp <4 x i32> [[WIDE_LOAD22]] to <4 x double>
; AVX1-NEXT: [[TMP24:%.*]] = sitofp <4 x i32> [[WIDE_LOAD23]] to <4 x double>
@@ -1560,13 +1560,13 @@ define void @foo3(double* %A, double* %B, i32* %trigger) #0 {
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP15]], i32 8, <4 x i1> [[TMP10]], <4 x double> undef), !alias.scope !34
; AVX2-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[TMP14]], i64 4
; AVX2-NEXT: [[TMP17:%.*]] = bitcast double* [[TMP16]] to <4 x double>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP17]], i32 8, <4 x i1> [[TMP11]], <4 x double> undef), !alias.scope !34
+; AVX2-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull [[TMP17]], i32 8, <4 x i1> [[TMP11]], <4 x double> undef), !alias.scope !34
; AVX2-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, double* [[TMP14]], i64 8
; AVX2-NEXT: [[TMP19:%.*]] = bitcast double* [[TMP18]] to <4 x double>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP19]], i32 8, <4 x i1> [[TMP12]], <4 x double> undef), !alias.scope !34
+; AVX2-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull [[TMP19]], i32 8, <4 x i1> [[TMP12]], <4 x double> undef), !alias.scope !34
; AVX2-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[TMP14]], i64 12
; AVX2-NEXT: [[TMP21:%.*]] = bitcast double* [[TMP20]] to <4 x double>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP21]], i32 8, <4 x i1> [[TMP13]], <4 x double> undef), !alias.scope !34
+; AVX2-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull [[TMP21]], i32 8, <4 x i1> [[TMP13]], <4 x double> undef), !alias.scope !34
; AVX2-NEXT: [[TMP22:%.*]] = sitofp <4 x i32> [[WIDE_LOAD]] to <4 x double>
; AVX2-NEXT: [[TMP23:%.*]] = sitofp <4 x i32> [[WIDE_LOAD22]] to <4 x double>
; AVX2-NEXT: [[TMP24:%.*]] = sitofp <4 x i32> [[WIDE_LOAD23]] to <4 x double>
@@ -1691,13 +1691,13 @@ define void @foo3(double* %A, double* %B, i32* %trigger) #0 {
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP15]], i32 8, <8 x i1> [[TMP10]], <8 x double> undef), !alias.scope !34
; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[TMP14]], i64 8
; AVX512-NEXT: [[TMP17:%.*]] = bitcast double* [[TMP16]] to <8 x double>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP17]], i32 8, <8 x i1> [[TMP11]], <8 x double> undef), !alias.scope !34
+; AVX512-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* nonnull [[TMP17]], i32 8, <8 x i1> [[TMP11]], <8 x double> undef), !alias.scope !34
; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, double* [[TMP14]], i64 16
; AVX512-NEXT: [[TMP19:%.*]] = bitcast double* [[TMP18]] to <8 x double>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP19]], i32 8, <8 x i1> [[TMP12]], <8 x double> undef), !alias.scope !34
+; AVX512-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* nonnull [[TMP19]], i32 8, <8 x i1> [[TMP12]], <8 x double> undef), !alias.scope !34
; AVX512-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[TMP14]], i64 24
; AVX512-NEXT: [[TMP21:%.*]] = bitcast double* [[TMP20]] to <8 x double>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP21]], i32 8, <8 x i1> [[TMP13]], <8 x double> undef), !alias.scope !34
+; AVX512-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* nonnull [[TMP21]], i32 8, <8 x i1> [[TMP13]], <8 x double> undef), !alias.scope !34
; AVX512-NEXT: [[TMP22:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x double>
; AVX512-NEXT: [[TMP23:%.*]] = sitofp <8 x i32> [[WIDE_LOAD22]] to <8 x double>
; AVX512-NEXT: [[TMP24:%.*]] = sitofp <8 x i32> [[WIDE_LOAD23]] to <8 x double>
@@ -2453,22 +2453,22 @@ define void @foo6(double* %in, double* %out, i32 %size, i32* %trigger) {
; AVX1-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[TMP18]], i64 -3
; AVX1-NEXT: [[REVERSE26:%.*]] = shufflevector <4 x i1> [[TMP14]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX1-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP19]] to <4 x double>*
-; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP20]], i32 8, <4 x i1> [[REVERSE26]], <4 x double> undef), !alias.scope !44
+; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull [[TMP20]], i32 8, <4 x i1> [[REVERSE26]], <4 x double> undef), !alias.scope !44
; AVX1-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, double* [[TMP18]], i64 -4
; AVX1-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[TMP21]], i64 -3
; AVX1-NEXT: [[REVERSE28:%.*]] = shufflevector <4 x i1> [[TMP15]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX1-NEXT: [[TMP23:%.*]] = bitcast double* [[TMP22]] to <4 x double>*
-; AVX1-NEXT: [[WIDE_MASKED_LOAD29:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP23]], i32 8, <4 x i1> [[REVERSE28]], <4 x double> undef), !alias.scope !44
+; AVX1-NEXT: [[WIDE_MASKED_LOAD29:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull [[TMP23]], i32 8, <4 x i1> [[REVERSE28]], <4 x double> undef), !alias.scope !44
; AVX1-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, double* [[TMP18]], i64 -8
; AVX1-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, double* [[TMP24]], i64 -3
; AVX1-NEXT: [[REVERSE31:%.*]] = shufflevector <4 x i1> [[TMP16]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX1-NEXT: [[TMP26:%.*]] = bitcast double* [[TMP25]] to <4 x double>*
-; AVX1-NEXT: [[WIDE_MASKED_LOAD32:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP26]], i32 8, <4 x i1> [[REVERSE31]], <4 x double> undef), !alias.scope !44
+; AVX1-NEXT: [[WIDE_MASKED_LOAD32:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull [[TMP26]], i32 8, <4 x i1> [[REVERSE31]], <4 x double> undef), !alias.scope !44
; AVX1-NEXT: [[TMP27:%.*]] = getelementptr inbounds double, double* [[TMP18]], i64 -12
; AVX1-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, double* [[TMP27]], i64 -3
; AVX1-NEXT: [[REVERSE34:%.*]] = shufflevector <4 x i1> [[TMP17]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX1-NEXT: [[TMP29:%.*]] = bitcast double* [[TMP28]] to <4 x double>*
-; AVX1-NEXT: [[WIDE_MASKED_LOAD35:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP29]], i32 8, <4 x i1> [[REVERSE34]], <4 x double> undef), !alias.scope !44
+; AVX1-NEXT: [[WIDE_MASKED_LOAD35:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull [[TMP29]], i32 8, <4 x i1> [[REVERSE34]], <4 x double> undef), !alias.scope !44
; AVX1-NEXT: [[TMP30:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
; AVX1-NEXT: [[TMP31:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD29]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
; AVX1-NEXT: [[TMP32:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD32]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
@@ -2571,22 +2571,22 @@ define void @foo6(double* %in, double* %out, i32 %size, i32* %trigger) {
; AVX2-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[TMP18]], i64 -3
; AVX2-NEXT: [[REVERSE26:%.*]] = shufflevector <4 x i1> [[TMP14]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP19]] to <4 x double>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP20]], i32 8, <4 x i1> [[REVERSE26]], <4 x double> undef), !alias.scope !44
+; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull [[TMP20]], i32 8, <4 x i1> [[REVERSE26]], <4 x double> undef), !alias.scope !44
; AVX2-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, double* [[TMP18]], i64 -4
; AVX2-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[TMP21]], i64 -3
; AVX2-NEXT: [[REVERSE28:%.*]] = shufflevector <4 x i1> [[TMP15]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP23:%.*]] = bitcast double* [[TMP22]] to <4 x double>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD29:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP23]], i32 8, <4 x i1> [[REVERSE28]], <4 x double> undef), !alias.scope !44
+; AVX2-NEXT: [[WIDE_MASKED_LOAD29:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull [[TMP23]], i32 8, <4 x i1> [[REVERSE28]], <4 x double> undef), !alias.scope !44
; AVX2-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, double* [[TMP18]], i64 -8
; AVX2-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, double* [[TMP24]], i64 -3
; AVX2-NEXT: [[REVERSE31:%.*]] = shufflevector <4 x i1> [[TMP16]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP26:%.*]] = bitcast double* [[TMP25]] to <4 x double>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD32:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP26]], i32 8, <4 x i1> [[REVERSE31]], <4 x double> undef), !alias.scope !44
+; AVX2-NEXT: [[WIDE_MASKED_LOAD32:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull [[TMP26]], i32 8, <4 x i1> [[REVERSE31]], <4 x double> undef), !alias.scope !44
; AVX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds double, double* [[TMP18]], i64 -12
; AVX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, double* [[TMP27]], i64 -3
; AVX2-NEXT: [[REVERSE34:%.*]] = shufflevector <4 x i1> [[TMP17]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP29:%.*]] = bitcast double* [[TMP28]] to <4 x double>*
-; AVX2-NEXT: [[WIDE_MASKED_LOAD35:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP29]], i32 8, <4 x i1> [[REVERSE34]], <4 x double> undef), !alias.scope !44
+; AVX2-NEXT: [[WIDE_MASKED_LOAD35:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull [[TMP29]], i32 8, <4 x i1> [[REVERSE34]], <4 x double> undef), !alias.scope !44
; AVX2-NEXT: [[TMP30:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
; AVX2-NEXT: [[TMP31:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD29]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
; AVX2-NEXT: [[TMP32:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD32]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
@@ -2715,22 +2715,22 @@ define void @foo6(double* %in, double* %out, i32 %size, i32* %trigger) {
; AVX512-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[TMP18]], i64 -7
; AVX512-NEXT: [[REVERSE26:%.*]] = shufflevector <8 x i1> [[TMP14]], <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP19]] to <8 x double>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP20]], i32 8, <8 x i1> [[REVERSE26]], <8 x double> undef), !alias.scope !56
+; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* nonnull [[TMP20]], i32 8, <8 x i1> [[REVERSE26]], <8 x double> undef), !alias.scope !56
; AVX512-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, double* [[TMP18]], i64 -8
; AVX512-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[TMP21]], i64 -7
; AVX512-NEXT: [[REVERSE28:%.*]] = shufflevector <8 x i1> [[TMP15]], <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP23:%.*]] = bitcast double* [[TMP22]] to <8 x double>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD29:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP23]], i32 8, <8 x i1> [[REVERSE28]], <8 x double> undef), !alias.scope !56
+; AVX512-NEXT: [[WIDE_MASKED_LOAD29:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* nonnull [[TMP23]], i32 8, <8 x i1> [[REVERSE28]], <8 x double> undef), !alias.scope !56
; AVX512-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, double* [[TMP18]], i64 -16
; AVX512-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, double* [[TMP24]], i64 -7
; AVX512-NEXT: [[REVERSE31:%.*]] = shufflevector <8 x i1> [[TMP16]], <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP26:%.*]] = bitcast double* [[TMP25]] to <8 x double>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD32:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP26]], i32 8, <8 x i1> [[REVERSE31]], <8 x double> undef), !alias.scope !56
+; AVX512-NEXT: [[WIDE_MASKED_LOAD32:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* nonnull [[TMP26]], i32 8, <8 x i1> [[REVERSE31]], <8 x double> undef), !alias.scope !56
; AVX512-NEXT: [[TMP27:%.*]] = getelementptr inbounds double, double* [[TMP18]], i64 -24
; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, double* [[TMP27]], i64 -7
; AVX512-NEXT: [[REVERSE34:%.*]] = shufflevector <8 x i1> [[TMP17]], <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP29:%.*]] = bitcast double* [[TMP28]] to <8 x double>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD35:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP29]], i32 8, <8 x i1> [[REVERSE34]], <8 x double> undef), !alias.scope !56
+; AVX512-NEXT: [[WIDE_MASKED_LOAD35:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* nonnull [[TMP29]], i32 8, <8 x i1> [[REVERSE34]], <8 x double> undef), !alias.scope !56
; AVX512-NEXT: [[TMP30:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
; AVX512-NEXT: [[TMP31:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD29]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
; AVX512-NEXT: [[TMP32:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD32]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
@@ -2910,13 +2910,13 @@ define void @foo7(double* noalias %out, double** noalias %in, i8* noalias %trigg
; AVX-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP13]], i32 8, <4 x i1> [[TMP8]], <4 x double*> undef)
; AVX-NEXT: [[TMP14:%.*]] = getelementptr inbounds double*, double** [[TMP12]], i64 4
; AVX-NEXT: [[TMP15:%.*]] = bitcast double** [[TMP14]] to <4 x double*>*
-; AVX-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP15]], i32 8, <4 x i1> [[TMP9]], <4 x double*> undef)
+; AVX-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* nonnull [[TMP15]], i32 8, <4 x i1> [[TMP9]], <4 x double*> undef)
; AVX-NEXT: [[TMP16:%.*]] = getelementptr inbounds double*, double** [[TMP12]], i64 8
; AVX-NEXT: [[TMP17:%.*]] = bitcast double** [[TMP16]] to <4 x double*>*
-; AVX-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP17]], i32 8, <4 x i1> [[TMP10]], <4 x double*> undef)
+; AVX-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* nonnull [[TMP17]], i32 8, <4 x i1> [[TMP10]], <4 x double*> undef)
; AVX-NEXT: [[TMP18:%.*]] = getelementptr inbounds double*, double** [[TMP12]], i64 12
; AVX-NEXT: [[TMP19:%.*]] = bitcast double** [[TMP18]] to <4 x double*>*
-; AVX-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP19]], i32 8, <4 x i1> [[TMP11]], <4 x double*> undef)
+; AVX-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* nonnull [[TMP19]], i32 8, <4 x i1> [[TMP11]], <4 x double*> undef)
; AVX-NEXT: [[TMP20:%.*]] = icmp ne <4 x double*> [[WIDE_MASKED_LOAD]], zeroinitializer
; AVX-NEXT: [[TMP21:%.*]] = icmp ne <4 x double*> [[WIDE_MASKED_LOAD13]], zeroinitializer
; AVX-NEXT: [[TMP22:%.*]] = icmp ne <4 x double*> [[WIDE_MASKED_LOAD14]], zeroinitializer
@@ -3003,13 +3003,13 @@ define void @foo7(double* noalias %out, double** noalias %in, i8* noalias %trigg
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP13]], i32 8, <8 x i1> [[TMP8]], <8 x double*> undef)
; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds double*, double** [[TMP12]], i64 8
; AVX512-NEXT: [[TMP15:%.*]] = bitcast double** [[TMP14]] to <8 x double*>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP15]], i32 8, <8 x i1> [[TMP9]], <8 x double*> undef)
+; AVX512-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* nonnull [[TMP15]], i32 8, <8 x i1> [[TMP9]], <8 x double*> undef)
; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds double*, double** [[TMP12]], i64 16
; AVX512-NEXT: [[TMP17:%.*]] = bitcast double** [[TMP16]] to <8 x double*>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP17]], i32 8, <8 x i1> [[TMP10]], <8 x double*> undef)
+; AVX512-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* nonnull [[TMP17]], i32 8, <8 x i1> [[TMP10]], <8 x double*> undef)
; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds double*, double** [[TMP12]], i64 24
; AVX512-NEXT: [[TMP19:%.*]] = bitcast double** [[TMP18]] to <8 x double*>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP19]], i32 8, <8 x i1> [[TMP11]], <8 x double*> undef)
+; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* nonnull [[TMP19]], i32 8, <8 x i1> [[TMP11]], <8 x double*> undef)
; AVX512-NEXT: [[TMP20:%.*]] = icmp ne <8 x double*> [[WIDE_MASKED_LOAD]], zeroinitializer
; AVX512-NEXT: [[TMP21:%.*]] = icmp ne <8 x double*> [[WIDE_MASKED_LOAD13]], zeroinitializer
; AVX512-NEXT: [[TMP22:%.*]] = icmp ne <8 x double*> [[WIDE_MASKED_LOAD14]], zeroinitializer
@@ -3163,13 +3163,13 @@ define void @foo8(double* noalias %out, i32 ()** noalias %in, i8* noalias %trigg
; AVX-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP13]], i32 8, <4 x i1> [[TMP8]], <4 x i32 ()*> undef)
; AVX-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP12]], i64 4
; AVX-NEXT: [[TMP15:%.*]] = bitcast i32 ()** [[TMP14]] to <4 x i32 ()*>*
-; AVX-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP15]], i32 8, <4 x i1> [[TMP9]], <4 x i32 ()*> undef)
+; AVX-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* nonnull [[TMP15]], i32 8, <4 x i1> [[TMP9]], <4 x i32 ()*> undef)
; AVX-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP12]], i64 8
; AVX-NEXT: [[TMP17:%.*]] = bitcast i32 ()** [[TMP16]] to <4 x i32 ()*>*
-; AVX-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP17]], i32 8, <4 x i1> [[TMP10]], <4 x i32 ()*> undef)
+; AVX-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* nonnull [[TMP17]], i32 8, <4 x i1> [[TMP10]], <4 x i32 ()*> undef)
; AVX-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP12]], i64 12
; AVX-NEXT: [[TMP19:%.*]] = bitcast i32 ()** [[TMP18]] to <4 x i32 ()*>*
-; AVX-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP19]], i32 8, <4 x i1> [[TMP11]], <4 x i32 ()*> undef)
+; AVX-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* nonnull [[TMP19]], i32 8, <4 x i1> [[TMP11]], <4 x i32 ()*> undef)
; AVX-NEXT: [[TMP20:%.*]] = icmp ne <4 x i32 ()*> [[WIDE_MASKED_LOAD]], zeroinitializer
; AVX-NEXT: [[TMP21:%.*]] = icmp ne <4 x i32 ()*> [[WIDE_MASKED_LOAD13]], zeroinitializer
; AVX-NEXT: [[TMP22:%.*]] = icmp ne <4 x i32 ()*> [[WIDE_MASKED_LOAD14]], zeroinitializer
@@ -3256,13 +3256,13 @@ define void @foo8(double* noalias %out, i32 ()** noalias %in, i8* noalias %trigg
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP13]], i32 8, <8 x i1> [[TMP8]], <8 x i32 ()*> undef)
; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP12]], i64 8
; AVX512-NEXT: [[TMP15:%.*]] = bitcast i32 ()** [[TMP14]] to <8 x i32 ()*>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP15]], i32 8, <8 x i1> [[TMP9]], <8 x i32 ()*> undef)
+; AVX512-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* nonnull [[TMP15]], i32 8, <8 x i1> [[TMP9]], <8 x i32 ()*> undef)
; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP12]], i64 16
; AVX512-NEXT: [[TMP17:%.*]] = bitcast i32 ()** [[TMP16]] to <8 x i32 ()*>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP17]], i32 8, <8 x i1> [[TMP10]], <8 x i32 ()*> undef)
+; AVX512-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* nonnull [[TMP17]], i32 8, <8 x i1> [[TMP10]], <8 x i32 ()*> undef)
; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP12]], i64 24
; AVX512-NEXT: [[TMP19:%.*]] = bitcast i32 ()** [[TMP18]] to <8 x i32 ()*>*
-; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP19]], i32 8, <8 x i1> [[TMP11]], <8 x i32 ()*> undef)
+; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* nonnull [[TMP19]], i32 8, <8 x i1> [[TMP11]], <8 x i32 ()*> undef)
; AVX512-NEXT: [[TMP20:%.*]] = icmp ne <8 x i32 ()*> [[WIDE_MASKED_LOAD]], zeroinitializer
; AVX512-NEXT: [[TMP21:%.*]] = icmp ne <8 x i32 ()*> [[WIDE_MASKED_LOAD13]], zeroinitializer
; AVX512-NEXT: [[TMP22:%.*]] = icmp ne <8 x i32 ()*> [[WIDE_MASKED_LOAD14]], zeroinitializer
OpenPOWER on IntegriCloud