summaryrefslogtreecommitdiffstats
path: root/clang/test/OpenMP/taskloop_simd_codegen.cpp
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2016-04-28 12:14:51 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2016-04-28 12:14:51 +0000
commit1e73ef38827800c7d2de8ea3f75c093f956f50ad (patch)
treefe1eacf894816c614008b309dae99f92f7b693de /clang/test/OpenMP/taskloop_simd_codegen.cpp
parent5556a5cf3b743992d13a5a2429749e88d5e73c8d (diff)
downloadbcm5719-llvm-1e73ef38827800c7d2de8ea3f75c093f956f50ad.tar.gz
bcm5719-llvm-1e73ef38827800c7d2de8ea3f75c093f956f50ad.zip
[OPENMP 4.5] Initial codegen for 'taskloop simd' directive.
OpenMP 4.5 defines 'taskloop simd' directive, which is combined directive for 'taskloop' and 'simd' directives. Patch adds initial codegen support for this directive and its 2 basic clauses 'safelen' and 'simdlen'. llvm-svn: 267872
Diffstat (limited to 'clang/test/OpenMP/taskloop_simd_codegen.cpp')
-rw-r--r--clang/test/OpenMP/taskloop_simd_codegen.cpp211
1 files changed, 211 insertions, 0 deletions
diff --git a/clang/test/OpenMP/taskloop_simd_codegen.cpp b/clang/test/OpenMP/taskloop_simd_codegen.cpp
new file mode 100644
index 00000000000..e1f0130c0b4
--- /dev/null
+++ b/clang/test/OpenMP/taskloop_simd_codegen.cpp
@@ -0,0 +1,211 @@
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s
+// expected-no-diagnostics
+// REQUIRES: x86-registered-target
+#ifndef HEADER
+#define HEADER
+
+// CHECK-LABEL: @main
+int main(int argc, char **argv) {
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]])
+// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
+// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
+// CHECK: store i64 0, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 9, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 1, i64* [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
+#pragma omp taskloop simd
+ for (int i = 0; i < 10; ++i)
+ ;
+// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK2:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
+// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
+// CHECK: store i64 0, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 9, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 1, i64* [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i8* null)
+#pragma omp taskloop simd nogroup grainsize(argc) simdlen(4)
+ for (int i = 0; i < 10; ++i)
+ ;
+// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
+// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
+// CHECK: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0
+// CHECK: [[IF_INT:%.+]] = sext i1 [[IF]] to i32
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
+// CHECK: store i64 0, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 %{{.+}}, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 1, i64* [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 4, i8* null)
+ int i;
+#pragma omp taskloop simd if(argc) shared(argc, argv) collapse(2) num_tasks(4) safelen(32)
+ for (i = 0; i < argc; ++i)
+ for (int j = argc; j < argv[argc][argc]; ++j)
+ ;
+}
+
+// CHECK: define internal i32 [[TASK1]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
+// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
+// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
+// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP1:!.+]]
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
+// CHECK: store i32 %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
+// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
+// CHECK: br label %{{.*}}!llvm.loop [[LOOP1]]
+// CHECK: ret i32 0
+
+// CHECK: define internal i32 [[TASK2]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
+// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
+// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
+// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP2:!.+]]
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
+// CHECK: store i32 %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
+// CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
+// CHECK: br label %{{.*}}!llvm.loop [[LOOP2]]
+// CHECK: ret i32 0
+
+// CHECK: define internal i32 [[TASK3]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
+// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
+// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
+// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
+// CHECK: store i64 [[LB_VAL]], i64* [[CNT:%.+]],
+// CHECK: br label
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: br label %{{.*}}!llvm.loop
+// CHECK: ret i32 0
+
+// CHECK-LABEL: @_ZN1SC2Ei
+struct S {
+ int a;
+ S(int c) {
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]])
+// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK4:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
+// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
+// CHECK: store i64 0, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 %{{.+}}, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 1, i64* [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[NUM_TASKS:%.+]] = zext i32 %{{.+}} to i64
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 [[NUM_TASKS]], i8* null)
+#pragma omp taskloop simd shared(c) num_tasks(a) simdlen(64) safelen(8)
+ for (a = 0; a < c; ++a)
+ ;
+ }
+} s(1);
+
+// CHECK: define internal i32 [[TASK4]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
+// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
+// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
+// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, i32* %
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: store i32 %
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: load i32, i32* %
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, i32* %
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: br label %{{.*}}!llvm.loop
+// CHECK: ret i32 0
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+// CHECK: !{!"llvm.loop.vectorize.width", i32 4}
+// CHECK: !{!"llvm.loop.vectorize.width", i32 32}
+// CHECK: !{!"llvm.loop.vectorize.width", i32 64}
+
+#endif
OpenPOWER on IntegriCloud