summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoopVectorize
diff options
context:
space:
mode:
authorDavid Green <david.green@arm.com>2019-11-19 18:37:21 +0000
committerDavid Green <david.green@arm.com>2019-11-19 18:37:30 +0000
commit882f23caeae5ad3ec1806eb6ec387e3611649d54 (patch)
tree6cc24e27da30a2b4d978d646898fb1c7954af166 /llvm/test/Transforms/LoopVectorize
parent411bfe476b758c09a0c9d4b3176e46f0a70de3bb (diff)
downloadbcm5719-llvm-882f23caeae5ad3ec1806eb6ec387e3611649d54.tar.gz
bcm5719-llvm-882f23caeae5ad3ec1806eb6ec387e3611649d54.zip
[ARM] MVE interleaving load and stores.
Now that we have the intrinsics, we can add VLD2/4 and VST2/4 lowering for MVE. This works the same way as Neon, recognising the load/shuffles combination and converting them into intrinsics in a pre-isel pass, which just calls getMaxSupportedInterleaveFactor, lowerInterleavedLoad and lowerInterleavedStore. The main difference to Neon is that we do not have a VLD3 instruction. Otherwise most of the code works very similarly, with just some minor differences in the form of the intrinsics to work around. VLD3 is disabled by making isLegalInterleavedAccessType return false for those cases. We may need some other future adjustments, such as VLD4 take up half the available registers so should maybe cost more. This patch should get the basics in though. Differential Revision: https://reviews.llvm.org/D69392
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
-rw-r--r--llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll108
1 files changed, 54 insertions, 54 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
index c25071156a2..d94735f13cf 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
@@ -20,20 +20,20 @@ entry:
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1
; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1
; VF_4-LABEL: Checking a loop in "i8_factor_2"
-; VF_4: Found an estimated cost of 72 for VF 4 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
+; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_4-NEXT: Found an estimated cost of 40 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1
; VF_8-LABEL: Checking a loop in "i8_factor_2"
-; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
+; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
; VF_16-LABEL: Checking a loop in "i8_factor_2"
-; VF_16: Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
+; VF_16: Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_16-NEXT: Found an estimated cost of 2 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_16-NEXT: Found an estimated cost of 4 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 0
@@ -61,20 +61,20 @@ entry:
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2
; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2
; VF_4-LABEL: Checking a loop in "i16_factor_2"
-; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
+; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
; VF_8-LABEL: Checking a loop in "i16_factor_2"
-; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
+; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
; VF_16-LABEL: Checking a loop in "i16_factor_2"
-; VF_16: Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
+; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_16-NEXT: Found an estimated cost of 4 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 0
@@ -97,25 +97,25 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in "i32_factor_2"
-; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
+; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
; VF_4-LABEL: Checking a loop in "i32_factor_2"
-; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
+; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
; VF_8-LABEL: Checking a loop in "i32_factor_2"
-; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
+; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
; VF_16-LABEL: Checking a loop in "i32_factor_2"
-; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
+; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_16-NEXT: Found an estimated cost of 16 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 0
@@ -189,15 +189,15 @@ entry:
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2
; VF_4-NEXT: Found an estimated cost of 40 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
; VF_8-LABEL: Checking a loop in "f16_factor_2"
-; VF_8: Found an estimated cost of 272 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2
+; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, half* %tmp1, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_8-NEXT: Found an estimated cost of 144 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
; VF_16-LABEL: Checking a loop in "f16_factor_2"
-; VF_16: Found an estimated cost of 1056 for VF 16 For instruction: %tmp2 = load half, half* %tmp0, align 2
+; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load half, half* %tmp0, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load half, half* %tmp1, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_16-NEXT: Found an estimated cost of 544 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %f16.2, %f16.2* %data, i64 %i, i32 0
@@ -220,25 +220,25 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in "f32_factor_2"
-; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load float, float* %tmp0, align 4
+; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load float, float* %tmp0, align 4
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load float, float* %tmp1, align 4
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4
; VF_4-LABEL: Checking a loop in "f32_factor_2"
-; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load float, float* %tmp0, align 4
+; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load float, float* %tmp0, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load float, float* %tmp1, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4
+; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4
; VF_8-LABEL: Checking a loop in "f32_factor_2"
-; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load float, float* %tmp0, align 4
+; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load float, float* %tmp0, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load float, float* %tmp1, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4
+; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4
; VF_16-LABEL: Checking a loop in "f32_factor_2"
-; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load float, float* %tmp0, align 4
+; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load float, float* %tmp0, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load float, float* %tmp1, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4
+; VF_16-NEXT: Found an estimated cost of 16 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %f32.2, %f32.2* %data, i64 %i, i32 0
@@ -700,14 +700,14 @@ entry:
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp2, align 1
; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store i8 0, i8* %tmp3, align 1
; VF_16-LABEL: Checking a loop in "i8_factor_4"
-; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp0, align 1
+; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp0, align 1
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, i8* %tmp1, align 1
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i8, i8* %tmp2, align 1
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i8, i8* %tmp3, align 1
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp2, align 1
-; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store i8 0, i8* %tmp3, align 1
+; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store i8 0, i8* %tmp3, align 1
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 0
@@ -754,23 +754,23 @@ entry:
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp2, align 2
; VF_4-NEXT: Found an estimated cost of 80 for VF 4 For instruction: store i16 0, i16* %tmp3, align 2
; VF_8-LABEL: Checking a loop in "i16_factor_4"
-; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
+; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp2, align 2
-; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store i16 0, i16* %tmp3, align 2
+; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store i16 0, i16* %tmp3, align 2
; VF_16-LABEL: Checking a loop in "i16_factor_4"
-; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
+; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp2, align 2
-; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store i16 0, i16* %tmp3, align 2
+; VF_16-NEXT: Found an estimated cost of 16 for VF 16 For instruction: store i16 0, i16* %tmp3, align 2
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 0
@@ -808,32 +808,32 @@ entry:
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4
; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i32 0, i32* %tmp3, align 4
; VF_4-LABEL: Checking a loop in "i32_factor_4"
-; VF_4: Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
+; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4
-; VF_4-NEXT: Found an estimated cost of 80 for VF 4 For instruction: store i32 0, i32* %tmp3, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 0, i32* %tmp3, align 4
; VF_8-LABEL: Checking a loop in "i32_factor_4"
-; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
+; VF_8: Found an estimated cost of 16 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp2, align 4
-; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store i32 0, i32* %tmp3, align 4
+; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 0, i32* %tmp3, align 4
; VF_16-LABEL: Checking a loop in "i32_factor_4"
-; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
+; VF_16: Found an estimated cost of 32 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp2, align 4
-; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store i32 0, i32* %tmp3, align 4
+; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 0, i32* %tmp3, align 4
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 0
@@ -943,23 +943,23 @@ entry:
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp2, align 2
; VF_4-NEXT: Found an estimated cost of 80 for VF 4 For instruction: store half 0xH0000, half* %tmp3, align 2
; VF_8-LABEL: Checking a loop in "f16_factor_4"
-; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load half, half* %tmp0, align 2
+; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp4 = load half, half* %tmp0, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, half* %tmp1, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load half, half* %tmp2, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load half, half* %tmp3, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp2, align 2
-; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store half 0xH0000, half* %tmp3, align 2
+; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store half 0xH0000, half* %tmp3, align 2
; VF_16-LABEL: Checking a loop in "f16_factor_4"
-; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load half, half* %tmp0, align 2
+; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp4 = load half, half* %tmp0, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, half* %tmp1, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load half, half* %tmp2, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load half, half* %tmp3, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp2, align 2
-; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store half 0xH0000, half* %tmp3, align 2
+; VF_16-NEXT: Found an estimated cost of 16 for VF 16 For instruction: store half 0xH0000, half* %tmp3, align 2
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 0
@@ -997,32 +997,32 @@ entry:
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp2, align 4
; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store float 0.000000e+00, float* %tmp3, align 4
; VF_4-LABEL: Checking a loop in "f32_factor_4"
-; VF_4: Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load float, float* %tmp0, align 4
+; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load float, float* %tmp0, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, float* %tmp1, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load float, float* %tmp2, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load float, float* %tmp3, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp2, align 4
-; VF_4-NEXT: Found an estimated cost of 80 for VF 4 For instruction: store float 0.000000e+00, float* %tmp3, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float 0.000000e+00, float* %tmp3, align 4
; VF_8-LABEL: Checking a loop in "f32_factor_4"
-; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load float, float* %tmp0, align 4
+; VF_8: Found an estimated cost of 16 for VF 8 For instruction: %tmp4 = load float, float* %tmp0, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, float* %tmp1, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load float, float* %tmp2, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load float, float* %tmp3, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp2, align 4
-; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store float 0.000000e+00, float* %tmp3, align 4
+; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store float 0.000000e+00, float* %tmp3, align 4
; VF_16-LABEL: Checking a loop in "f32_factor_4"
-; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load float, float* %tmp0, align 4
+; VF_16: Found an estimated cost of 32 for VF 16 For instruction: %tmp4 = load float, float* %tmp0, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, float* %tmp1, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load float, float* %tmp2, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load float, float* %tmp3, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp2, align 4
-; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store float 0.000000e+00, float* %tmp3, align 4
+; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store float 0.000000e+00, float* %tmp3, align 4
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 0
OpenPOWER on IntegriCloud