diff options
author | Eric Christopher <echristo@gmail.com> | 2019-04-17 04:52:47 +0000 |
---|---|---|
committer | Eric Christopher <echristo@gmail.com> | 2019-04-17 04:52:47 +0000 |
commit | cee313d288a4faf0355d76fb6e0e927e211d08a5 (patch) | |
tree | d386075318d761197779a96e5d8fc0dc7b06342b /llvm/test/Transforms/LoopDataPrefetch | |
parent | c3d6a929fdd92fd06d4304675ade8d7210ee711a (diff) | |
download | bcm5719-llvm-cee313d288a4faf0355d76fb6e0e927e211d08a5.tar.gz bcm5719-llvm-cee313d288a4faf0355d76fb6e0e927e211d08a5.zip |
Revert "Temporarily Revert "Add basic loop fusion pass.""
The reversion apparently deleted the test/Transforms directory.
Will be re-reverting again.
llvm-svn: 358552
Diffstat (limited to 'llvm/test/Transforms/LoopDataPrefetch')
7 files changed, 307 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopDataPrefetch/AArch64/kryo-large-stride.ll b/llvm/test/Transforms/LoopDataPrefetch/AArch64/kryo-large-stride.ll new file mode 100644 index 00000000000..22cf6a9f6b0 --- /dev/null +++ b/llvm/test/Transforms/LoopDataPrefetch/AArch64/kryo-large-stride.ll @@ -0,0 +1,53 @@ +; RUN: opt -mcpu=kryo -mtriple=aarch64-gnu-linux -loop-data-prefetch -max-prefetch-iters-ahead=1000 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL +; RUN: opt -mcpu=kryo -mtriple=aarch64-gnu-linux -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL +; RUN: opt -mcpu=kryo -mtriple=aarch64-gnu-linux -passes=loop-data-prefetch -max-prefetch-iters-ahead=1000 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL +; RUN: opt -mcpu=kryo -mtriple=aarch64-gnu-linux -passes=loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" + +; ALL-LABEL: @small_stride( +define void @small_stride(double* nocapture %a, double* nocapture readonly %b) { +entry: + br label %for.body + +; ALL: for.body: +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv +; ALL-NOT: call void @llvm.prefetch + %0 = load double, double* %arrayidx, align 8 + %add = fadd double %0, 1.000000e+00 + %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv + store double %add, double* %arrayidx2, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1600 + br i1 %exitcond, label %for.end, label %for.body + +; ALL: for.end: +for.end: ; preds = %for.body + ret void +} + +; ALL-LABEL: @large_stride( +define void @large_stride(double* nocapture %a, double* nocapture readonly %b) { +entry: + br label %for.body + +; ALL: for.body: +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv +; LARGE_PREFETCH: call void @llvm.prefetch +; NO_LARGE_PREFETCH-NOT: call void @llvm.prefetch + %0 = load double, double* %arrayidx, align 8 + %add = fadd double %0, 1.000000e+00 + %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv + store double %add, double* %arrayidx2, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 150 + %exitcond = icmp eq i64 %indvars.iv.next, 160000 + br i1 %exitcond, label %for.end, label %for.body + +; ALL: for.end: +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll b/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll new file mode 100644 index 00000000000..fe956a83f56 --- /dev/null +++ b/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll @@ -0,0 +1,55 @@ +; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -max-prefetch-iters-ahead=100 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL +; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL +; RUN: opt -mcpu=generic -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL +; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -passes=loop-data-prefetch -max-prefetch-iters-ahead=100 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL +; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -passes=loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL +; RUN: opt -mcpu=generic -mtriple=arm64-apple-ios -passes=loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" + +; ALL-LABEL: @small_stride( +define void @small_stride(double* nocapture %a, double* nocapture readonly %b) { +entry: + br label %for.body + +; ALL: for.body: +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv +; ALL-NOT: call void @llvm.prefetch + %0 = load double, double* %arrayidx, align 8 + %add = fadd double %0, 1.000000e+00 + %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv + store double %add, double* %arrayidx2, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1600 + br i1 %exitcond, label %for.end, label %for.body + +; ALL: for.end: +for.end: ; preds = %for.body + ret void +} + +; ALL-LABEL: @large_stride( +define void @large_stride(double* nocapture %a, double* nocapture readonly %b) { +entry: + br label %for.body + +; ALL: for.body: +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv +; LARGE_PREFETCH: call void @llvm.prefetch +; NO_LARGE_PREFETCH-NOT: call void @llvm.prefetch + %0 = load double, double* %arrayidx, align 8 + %add = fadd double %0, 1.000000e+00 + %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv + store double %add, double* %arrayidx2, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 300 + %exitcond = icmp eq i64 %indvars.iv.next, 160000 + br i1 %exitcond, label %for.end, label %for.body + +; ALL: for.end: +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopDataPrefetch/AArch64/lit.local.cfg b/llvm/test/Transforms/LoopDataPrefetch/AArch64/lit.local.cfg new file mode 100644 index 00000000000..675f48e199a --- /dev/null +++ b/llvm/test/Transforms/LoopDataPrefetch/AArch64/lit.local.cfg @@ -0,0 +1,4 @@ +config.suffixes = ['.ll'] + +if not 'AArch64' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/LoopDataPrefetch/AArch64/opt-remark-with-hotness.ll b/llvm/test/Transforms/LoopDataPrefetch/AArch64/opt-remark-with-hotness.ll new file mode 100644 index 00000000000..6149119e061 --- /dev/null +++ b/llvm/test/Transforms/LoopDataPrefetch/AArch64/opt-remark-with-hotness.ll @@ -0,0 +1,86 @@ +; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch \ +; RUN: -pass-remarks=loop-data-prefetch -S -max-prefetch-iters-ahead=100 \ +; RUN: -pass-remarks-with-hotness \ +; RUN: < %s 2>&1 | FileCheck %s +; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -passes=loop-data-prefetch \ +; RUN: -pass-remarks=loop-data-prefetch -S -max-prefetch-iters-ahead=100 \ +; RUN: -pass-remarks-with-hotness \ +; RUN: < %s 2>&1 | FileCheck %s + +; ModuleID = '/tmp/s.c' +source_filename = "/tmp/s.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios5.0.0" + +; 1 struct MyStruct { +; 2 int field; +; 3 char kk[2044]; +; 4 } *my_struct; +; 5 +; 6 int f(struct MyStruct *p, int N) { +; 7 int total = 0; +; 8 for (int i = 0; i < N; i++) { +; 9 total += my_struct[i].field; +; 10 } +; 11 return total; +; 12 } + +; CHECK: remark: /tmp/s.c:9:27: prefetched memory access (hotness: 600) + +%struct.MyStruct = type { i32, [2044 x i8] } + +@my_struct = common global %struct.MyStruct* null, align 8 + +define i32 @f(%struct.MyStruct* nocapture readnone %p, i32 %N) !dbg !6 !prof !21 { +entry: + %cmp6 = icmp sgt i32 %N, 0, !dbg !8 + br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup, !dbg !9, !prof !22 + +for.body.lr.ph: ; preds = %entry + %0 = load %struct.MyStruct*, %struct.MyStruct** @my_struct, align 8, !dbg !10, !tbaa !11 + br label %for.body, !dbg !9 + +for.cond.cleanup: ; preds = %for.body, %entry + %total.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %total.0.lcssa, !dbg !15 + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %total.07 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %field = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %0, i64 %indvars.iv, i32 0, !dbg !16 + %1 = load i32, i32* %field, align 4, !dbg !16, !tbaa !17 + %add = add nsw i32 %1, %total.07, !dbg !20 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !9 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !9 + %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !9 + br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !9, !prof !23 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "/tmp/s.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"PIC Level", i32 2} +!5 = !{!"clang version 3.9.0"} +!6 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 6, type: !7, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 8, column: 21, scope: !6) +!9 = !DILocation(line: 8, column: 3, scope: !6) +!10 = !DILocation(line: 9, column: 14, scope: !6) +!11 = !{!12, !12, i64 0} +!12 = !{!"any pointer", !13, i64 0} +!13 = !{!"omnipotent char", !14, i64 0} +!14 = !{!"Simple C/C++ TBAA"} +!15 = !DILocation(line: 11, column: 3, scope: !6) +!16 = !DILocation(line: 9, column: 27, scope: !6) +!17 = !{!18, !19, i64 0} +!18 = !{!"MyStruct", !19, i64 0, !13, i64 4} +!19 = !{!"int", !13, i64 0} +!20 = !DILocation(line: 9, column: 11, scope: !6) +!21 = !{!"function_entry_count", i64 6} +!22 = !{!"branch_weights", i32 99, i32 1} +!23 = !{!"branch_weights", i32 1, i32 99} diff --git a/llvm/test/Transforms/LoopDataPrefetch/AArch64/opt-remark.ll b/llvm/test/Transforms/LoopDataPrefetch/AArch64/opt-remark.ll new file mode 100644 index 00000000000..e7d8f5a2ec4 --- /dev/null +++ b/llvm/test/Transforms/LoopDataPrefetch/AArch64/opt-remark.ll @@ -0,0 +1,81 @@ +; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch \ +; RUN: -pass-remarks=loop-data-prefetch -S -max-prefetch-iters-ahead=100 \ +; RUN: < %s 2>&1 | FileCheck %s +; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -passes=loop-data-prefetch \ +; RUN: -pass-remarks=loop-data-prefetch -S -max-prefetch-iters-ahead=100 \ +; RUN: < %s 2>&1 | FileCheck %s + +; ModuleID = '/tmp/s.c' +source_filename = "/tmp/s.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios5.0.0" + +; 1 struct MyStruct { +; 2 int field; +; 3 char kk[2044]; +; 4 } *my_struct; +; 5 +; 6 int f(struct MyStruct *p, int N) { +; 7 int total = 0; +; 8 for (int i = 0; i < N; i++) { +; 9 total += my_struct[i].field; +; 10 } +; 11 return total; +; 12 } + +; CHECK: remark: /tmp/s.c:9:27: prefetched memory access + +%struct.MyStruct = type { i32, [2044 x i8] } + +@my_struct = common global %struct.MyStruct* null, align 8 + +define i32 @f(%struct.MyStruct* nocapture readnone %p, i32 %N) !dbg !6 { +entry: + %cmp6 = icmp sgt i32 %N, 0, !dbg !8 + br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup, !dbg !9 + +for.body.lr.ph: ; preds = %entry + %0 = load %struct.MyStruct*, %struct.MyStruct** @my_struct, align 8, !dbg !10, !tbaa !11 + br label %for.body, !dbg !9 + +for.cond.cleanup: ; preds = %for.body, %entry + %total.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %total.0.lcssa, !dbg !15 + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %total.07 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %field = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %0, i64 %indvars.iv, i32 0, !dbg !16 + %1 = load i32, i32* %field, align 4, !dbg !16, !tbaa !17 + %add = add nsw i32 %1, %total.07, !dbg !20 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !9 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !9 + %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !9 + br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !9 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "/tmp/s.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"PIC Level", i32 2} +!5 = !{!"clang version 3.9.0"} +!6 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 6, type: !7, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 8, column: 21, scope: !6) +!9 = !DILocation(line: 8, column: 3, scope: !6) +!10 = !DILocation(line: 9, column: 14, scope: !6) +!11 = !{!12, !12, i64 0} +!12 = !{!"any pointer", !13, i64 0} +!13 = !{!"omnipotent char", !14, i64 0} +!14 = !{!"Simple C/C++ TBAA"} +!15 = !DILocation(line: 11, column: 3, scope: !6) +!16 = !DILocation(line: 9, column: 27, scope: !6) +!17 = !{!18, !19, i64 0} +!18 = !{!"MyStruct", !19, i64 0, !13, i64 4} +!19 = !{!"int", !13, i64 0} +!20 = !DILocation(line: 9, column: 11, scope: !6) diff --git a/llvm/test/Transforms/LoopDataPrefetch/PowerPC/basic.ll b/llvm/test/Transforms/LoopDataPrefetch/PowerPC/basic.ll new file mode 100644 index 00000000000..ea46fd0d5a8 --- /dev/null +++ b/llvm/test/Transforms/LoopDataPrefetch/PowerPC/basic.ll @@ -0,0 +1,26 @@ +; RUN: opt -mcpu=a2 -loop-data-prefetch -S < %s | FileCheck %s +; RUN: opt -mcpu=a2 -passes=loop-data-prefetch -S < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-bgq-linux" + +define void @foo(double* nocapture %a, double* nocapture readonly %b) { +entry: + br label %for.body + +; CHECK: for.body: +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv +; CHECK: call void @llvm.prefetch + %0 = load double, double* %arrayidx, align 8 + %add = fadd double %0, 1.000000e+00 + %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv + store double %add, double* %arrayidx2, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1600 + br i1 %exitcond, label %for.end, label %for.body + +; CHECK: for.end: +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopDataPrefetch/PowerPC/lit.local.cfg b/llvm/test/Transforms/LoopDataPrefetch/PowerPC/lit.local.cfg new file mode 100644 index 00000000000..091332439b1 --- /dev/null +++ b/llvm/test/Transforms/LoopDataPrefetch/PowerPC/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'PowerPC' in config.root.targets: + config.unsupported = True |