diff options
| author | Alexey Bataev <a.bataev@hotmail.com> | 2015-08-21 06:41:23 +0000 |
|---|---|---|
| committer | Alexey Bataev <a.bataev@hotmail.com> | 2015-08-21 06:41:23 +0000 |
| commit | 84cfb1dcdeb694a10dce9927ec5f0e159e46ffa9 (patch) | |
| tree | 87de1a47d363ad9cda798a6ba80d5bebb4342bcd /clang | |
| parent | 187155360e392995c92cc2e266a6cfec953595b7 (diff) | |
| download | bcm5719-llvm-84cfb1dcdeb694a10dce9927ec5f0e159e46ffa9.tar.gz bcm5719-llvm-84cfb1dcdeb694a10dce9927ec5f0e159e46ffa9.zip | |
[OPENMP 4.1] Improved codegen for 'uval' qualifier of 'linear' clause.
According to standard the 'uval' modifier declares the address of the original list item to have an invariant value for all iterations of the associated loop(s). Patch improves codegen for this qualifier by removing usage of the original reference variable and replacing by referenced l-value.
llvm-svn: 245674
Diffstat (limited to 'clang')
| -rw-r--r-- | clang/lib/Sema/SemaOpenMP.cpp | 20 | ||||
| -rw-r--r-- | clang/test/OpenMP/simd_codegen.cpp | 129 |
2 files changed, 144 insertions, 5 deletions
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index d2653b7cb90..5d86cc23f00 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -6423,7 +6423,12 @@ OMPClause *Sema::ActOnOpenMPLinearClause( *this, Private, DE->getType().getUnqualifiedType(), DE->getExprLoc()); // Build var to save initial value. VarDecl *Init = buildVarDecl(*this, ELoc, QType, ".linear.start"); - AddInitializerToDecl(Init, DefaultLvalueConversion(DE).get(), + Expr *InitExpr; + if (LinKind == OMPC_LINEAR_uval) + InitExpr = VD->getInit(); + else + InitExpr = DE; + AddInitializerToDecl(Init, DefaultLvalueConversion(InitExpr).get(), /*DirectInit*/ false, /*TypeMayContainAuto*/ false); auto InitRef = buildDeclRefExpr( *this, Init, DE->getType().getUnqualifiedType(), DE->getExprLoc()); @@ -6491,15 +6496,20 @@ static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV, bool HasErrors = false; auto CurInit = Clause.inits().begin(); auto CurPrivate = Clause.privates().begin(); + auto LinKind = Clause.getModifier(); for (auto &RefExpr : Clause.varlists()) { Expr *InitExpr = *CurInit; // Build privatized reference to the current linear var. auto DE = cast<DeclRefExpr>(RefExpr); - auto CapturedRef = - buildDeclRefExpr(SemaRef, cast<VarDecl>(DE->getDecl()), - DE->getType().getUnqualifiedType(), DE->getExprLoc(), - /*RefersToCapture=*/true); + Expr *CapturedRef; + if (LinKind == OMPC_LINEAR_uval) + CapturedRef = cast<VarDecl>(DE->getDecl())->getInit(); + else + CapturedRef = + buildDeclRefExpr(SemaRef, cast<VarDecl>(DE->getDecl()), + DE->getType().getUnqualifiedType(), DE->getExprLoc(), + /*RefersToCapture=*/true); // Build update: Var = InitExpr + IV * Step ExprResult Update = diff --git a/clang/test/OpenMP/simd_codegen.cpp b/clang/test/OpenMP/simd_codegen.cpp index e67ad5d4c7e..bea46479274 100644 --- a/clang/test/OpenMP/simd_codegen.cpp +++ b/clang/test/OpenMP/simd_codegen.cpp @@ -481,6 +481,135 @@ void widened(float *a, float *b, float *c, float *d) { // CHECK: ret void } +// CHECK-LABEL: define {{.*void}} @{{.*}}linear{{.*}}(float* {{.+}}) +void linear(float *a) { + // CHECK: [[VAL_ADDR:%.+]] = alloca i64, + // CHECK: [[K_ADDR:%.+]] = alloca i64*, + long long val = 0; + long long &k = val; + + #pragma omp simd linear(k : 3) +// CHECK: store i64* [[VAL_ADDR]], i64** [[K_ADDR]], +// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]] +// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]], +// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]] +// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]] + +// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]] +// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9 +// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]] + for (int i = 10; i > 1; i--) { +// CHECK: [[SIMPLE_LOOP_BODY]] +// Start of body: calculate i from IV: +// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// FIXME: It is interesting, why the following "mul 1" was not constant folded? +// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1 +// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]] +// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// +// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3 +// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64 +// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]] +// Update of the privatized version of linear variable! +// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]] + a[k]++; + k = k + 3; +// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1 +// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]] + } +// CHECK: [[SIMPLE_LOOP_END]] +// +// Update linear vars after loop, as the loop was operating on a private version. +// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]], +// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]] +// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27 +// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]] +// + + #pragma omp simd linear(val(k) : 3) +// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]] +// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]], +// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]] +// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]] + +// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]] +// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9 +// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]] + for (int i = 10; i > 1; i--) { +// CHECK: [[SIMPLE_LOOP_BODY]] +// Start of body: calculate i from IV: +// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// FIXME: It is interesting, why the following "mul 1" was not constant folded? +// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1 +// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]] +// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// +// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3 +// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64 +// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]] +// Update of the privatized version of linear variable! +// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]] + a[k]++; + k = k + 3; +// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1 +// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]] + } +// CHECK: [[SIMPLE_LOOP_END]] +// +// Update linear vars after loop, as the loop was operating on a private version. +// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]], +// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]] +// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27 +// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]] +// + #pragma omp simd linear(uval(k) : 3) +// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]] +// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[VAL_ADDR]] +// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]] + +// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]] +// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9 +// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]] + for (int i = 10; i > 1; i--) { +// CHECK: [[SIMPLE_LOOP_BODY]] +// Start of body: calculate i from IV: +// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// FIXME: It is interesting, why the following "mul 1" was not constant folded? +// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1 +// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]] +// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// +// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3 +// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64 +// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]] +// Update of the privatized version of linear variable! +// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]] + a[k]++; + k = k + 3; +// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1 +// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]] + } +// CHECK: [[SIMPLE_LOOP_END]] +// +// Update linear vars after loop, as the loop was operating on a private version. +// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]] +// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27 +// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[VAL_ADDR]] +// +} + // TERM_DEBUG-LABEL: bar int bar() {return 0;}; |

