summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2015-04-22 11:59:37 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2015-04-22 11:59:37 +0000
commit62dbb979c0bd3ab34d0b60b6fedfb66321d81dcf (patch)
tree0508793fb5118827e38d856e37c22957cf3ffc41 /clang
parent592cee666e17edd4402f355f36af51f41915efbe (diff)
downloadbcm5719-llvm-62dbb979c0bd3ab34d0b60b6fedfb66321d81dcf.tar.gz
bcm5719-llvm-62dbb979c0bd3ab34d0b60b6fedfb66321d81dcf.zip
[OPENMP] Fix use of unsigned counters in loops with zero trip count.
Patch fixes bugs in codegen for loops with unsigned counters and zero trip count. Previously preconditions for all loops were built using logic (Upper - Lower) > 0. But if the loop is a loop with zero trip count, then Upper - Lower is < 0 only for signed integer, for unsigned we're running into an underflow situation. In this patch we're using original Lower<Upper condition to check that loop body can be executed at least once. Also this allows to skip code generation for loops, if it is known that preconditions for the loop are always false. Differential Revision: http://reviews.llvm.org/D9103 llvm-svn: 235500
Diffstat (limited to 'clang')
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp141
-rw-r--r--clang/lib/Sema/SemaOpenMP.cpp33
-rw-r--r--clang/test/OpenMP/for_codegen.cpp25
-rw-r--r--clang/test/OpenMP/for_firstprivate_codegen.cpp4
-rw-r--r--clang/test/OpenMP/for_lastprivate_codegen.cpp25
-rw-r--r--clang/test/OpenMP/parallel_for_codegen.cpp22
-rw-r--r--clang/test/OpenMP/simd_codegen.cpp27
7 files changed, 159 insertions, 118 deletions
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 9451802ecef..c83dda255d6 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -680,6 +680,38 @@ static void EmitPrivateLoopCounters(CodeGenFunction &CGF,
}
}
+static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
+ const Expr *Cond, llvm::BasicBlock *TrueBlock,
+ llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
+ CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
+ EmitPrivateLoopCounters(CGF, PreCondScope, S.counters());
+ const VarDecl *IVDecl =
+ cast<VarDecl>(cast<DeclRefExpr>(S.getIterationVariable())->getDecl());
+ bool IsRegistered = PreCondScope.addPrivate(IVDecl, [&]() -> llvm::Value *{
+ // Emit var without initialization.
+ auto VarEmission = CGF.EmitAutoVarAlloca(*IVDecl);
+ CGF.EmitAutoVarCleanups(VarEmission);
+ return VarEmission.getAllocatedAddress();
+ });
+ assert(IsRegistered && "counter already registered as private");
+ // Silence the warning about unused variable.
+ (void)IsRegistered;
+ (void)PreCondScope.Privatize();
+ // Initialize internal counter to 0 to calculate initial values of real
+ // counters.
+ LValue IV = CGF.EmitLValue(S.getIterationVariable());
+ CGF.EmitStoreOfScalar(
+ llvm::ConstantInt::getNullValue(
+ IV.getAddress()->getType()->getPointerElementType()),
+ CGF.EmitLValue(S.getIterationVariable()), /*isInit=*/true);
+ // Get initial values of real counters.
+ for (auto I : S.updates()) {
+ CGF.EmitIgnoredExpr(I);
+ }
+ // Check that loop is executed at least one time.
+ CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
+}
+
static void
EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
CodeGenFunction::OMPPrivateScope &PrivateScope) {
@@ -704,7 +736,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
// Pragma 'simd' code depends on presence of 'lastprivate'.
// If present, we have to separate last iteration of the loop:
//
- // if (LastIteration != 0) {
+ // if (PreCond) {
// for (IV in 0..LastIteration-1) BODY;
// BODY with updates of lastprivate vars;
// <Final counter/linear vars updates>;
@@ -712,10 +744,28 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
//
// otherwise (when there's no lastprivate):
//
+ // if (PreCond) {
// for (IV in 0..LastIteration) BODY;
// <Final counter/linear vars updates>;
+ // }
//
+ // Emit: if (PreCond) - begin.
+ // If the condition constant folds and can be elided, avoid emitting the
+ // whole loop.
+ bool CondConstant;
+ llvm::BasicBlock *ContBlock = nullptr;
+ if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
+ if (!CondConstant)
+ return;
+ } else {
+ RegionCounter Cnt = CGF.getPGORegionCounter(&S);
+ auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
+ ContBlock = CGF.createBasicBlock("simd.if.end");
+ emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
+ CGF.EmitBlock(ThenBlock);
+ Cnt.beginRegion(CGF.Builder);
+ }
// Walk clauses and process safelen/lastprivate.
bool SeparateIter = false;
CGF.LoopStack.setParallel();
@@ -780,51 +830,28 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
}
}
- if (SeparateIter) {
- // Emit: if (LastIteration > 0) - begin.
- RegionCounter Cnt = CGF.getPGORegionCounter(&S);
- auto ThenBlock = CGF.createBasicBlock("simd.if.then");
- auto ContBlock = CGF.createBasicBlock("simd.if.end");
- CGF.EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock,
- Cnt.getCount());
- CGF.EmitBlock(ThenBlock);
- Cnt.beginRegion(CGF.Builder);
- // Emit 'then' code.
- {
- OMPPrivateScope LoopScope(CGF);
- EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
- EmitPrivateLinearVars(CGF, S, LoopScope);
- CGF.EmitOMPPrivateClause(S, LoopScope);
- (void)LoopScope.Privatize();
- CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
- S.getCond(/*SeparateIter=*/true), S.getInc(),
- [&S](CodeGenFunction &CGF) {
- CGF.EmitOMPLoopBody(S);
- CGF.EmitStopPoint(&S);
- },
- [](CodeGenFunction &) {});
- CGF.EmitOMPLoopBody(S, /* SeparateIter */ true);
+ {
+ OMPPrivateScope LoopScope(CGF);
+ EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
+ EmitPrivateLinearVars(CGF, S, LoopScope);
+ CGF.EmitOMPPrivateClause(S, LoopScope);
+ (void)LoopScope.Privatize();
+ CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+ S.getCond(SeparateIter), S.getInc(),
+ [&S](CodeGenFunction &CGF) {
+ CGF.EmitOMPLoopBody(S);
+ CGF.EmitStopPoint(&S);
+ },
+ [](CodeGenFunction &) {});
+ if (SeparateIter) {
+ CGF.EmitOMPLoopBody(S, /*SeparateIter=*/true);
}
- CGF.EmitOMPSimdFinal(S);
- // Emit: if (LastIteration != 0) - end.
+ }
+ CGF.EmitOMPSimdFinal(S);
+ // Emit: if (PreCond) - end.
+ if (ContBlock) {
CGF.EmitBranch(ContBlock);
CGF.EmitBlock(ContBlock, true);
- } else {
- {
- OMPPrivateScope LoopScope(CGF);
- EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
- EmitPrivateLinearVars(CGF, S, LoopScope);
- CGF.EmitOMPPrivateClause(S, LoopScope);
- (void)LoopScope.Privatize();
- CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
- S.getCond(/*SeparateIter=*/false), S.getInc(),
- [&S](CodeGenFunction &CGF) {
- CGF.EmitOMPLoopBody(S);
- CGF.EmitStopPoint(&S);
- },
- [](CodeGenFunction &) {});
- }
- CGF.EmitOMPSimdFinal(S);
}
};
CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
@@ -1010,12 +1037,22 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
// Check pre-condition.
{
// Skip the entire loop if we don't meet the precondition.
- RegionCounter Cnt = getPGORegionCounter(&S);
- auto ThenBlock = createBasicBlock("omp.precond.then");
- auto ContBlock = createBasicBlock("omp.precond.end");
- EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
- EmitBlock(ThenBlock);
- Cnt.beginRegion(Builder);
+ // If the condition constant folds and can be elided, avoid emitting the
+ // whole loop.
+ bool CondConstant;
+ llvm::BasicBlock *ContBlock = nullptr;
+ if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
+ if (!CondConstant)
+ return false;
+ } else {
+ RegionCounter Cnt = getPGORegionCounter(&S);
+ auto *ThenBlock = createBasicBlock("omp.precond.then");
+ ContBlock = createBasicBlock("omp.precond.end");
+ emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
+ Cnt.getCount());
+ EmitBlock(ThenBlock);
+ Cnt.beginRegion(Builder);
+ }
// Emit 'then' code.
{
// Emit helper vars inits.
@@ -1090,8 +1127,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
}
// We're now done with the loop, so jump to the continuation block.
- EmitBranch(ContBlock);
- EmitBlock(ContBlock, true);
+ if (ContBlock) {
+ EmitBranch(ContBlock);
+ EmitBlock(ContBlock, true);
+ }
}
return HasLastprivateClause;
}
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index fed0ac77b84..0cfb94eb482 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -1977,6 +1977,8 @@ public:
bool ShouldSubtractStep() const { return SubtractStep; }
/// \brief Build the expression to calculate the number of iterations.
Expr *BuildNumIterations(Scope *S, const bool LimitedType) const;
+ /// \brief Build the precondition expression for the loops.
+ Expr *BuildPreCond(Scope *S, Expr *Cond) const;
/// \brief Build reference expression to the counter be used for codegen.
Expr *BuildCounterVar() const;
/// \brief Build initization of the counter be used for codegen.
@@ -2380,6 +2382,19 @@ OpenMPIterationSpaceChecker::BuildNumIterations(Scope *S,
return Diff.get();
}
+Expr *OpenMPIterationSpaceChecker::BuildPreCond(Scope *S, Expr *Cond) const {
+ // Try to build LB <op> UB, where <op> is <, >, <=, or >=.
+ bool Suppress = SemaRef.getDiagnostics().getSuppressAllDiagnostics();
+ SemaRef.getDiagnostics().setSuppressAllDiagnostics(/*Val=*/true);
+ auto CondExpr = SemaRef.BuildBinOp(
+ S, DefaultLoc, TestIsLessOp ? (TestIsStrictOp ? BO_LT : BO_LE)
+ : (TestIsStrictOp ? BO_GT : BO_GE),
+ LB, UB);
+ SemaRef.getDiagnostics().setSuppressAllDiagnostics(Suppress);
+ // Otherwise use original loop conditon and evaluate it in runtime.
+ return CondExpr.isUsable() ? CondExpr.get() : Cond;
+}
+
/// \brief Build reference expression to the counter be used for codegen.
Expr *OpenMPIterationSpaceChecker::BuildCounterVar() const {
return DeclRefExpr::Create(SemaRef.Context, NestedNameSpecifierLoc(),
@@ -2395,6 +2410,8 @@ Expr *OpenMPIterationSpaceChecker::BuildCounterStep() const { return Step; }
/// \brief Iteration space of a single for loop.
struct LoopIterationSpace {
+ /// \brief Condition of the loop.
+ Expr *PreCond;
/// \brief This expression calculates the number of iterations in the loop.
/// It is always possible to calculate it before starting the loop.
Expr *NumIterations;
@@ -2535,6 +2552,7 @@ static bool CheckOpenMPIterationSpace(
return HasErrors;
// Build the loop's iteration space representation.
+ ResultIterSpace.PreCond = ISC.BuildPreCond(DSA.getCurScope(), For->getCond());
ResultIterSpace.NumIterations = ISC.BuildNumIterations(
DSA.getCurScope(), /* LimitedType */ isOpenMPWorksharingDirective(DKind));
ResultIterSpace.CounterVar = ISC.BuildCounterVar();
@@ -2545,7 +2563,8 @@ static bool CheckOpenMPIterationSpace(
ResultIterSpace.IncSrcRange = ISC.GetIncrementSrcRange();
ResultIterSpace.Subtract = ISC.ShouldSubtractStep();
- HasErrors |= (ResultIterSpace.NumIterations == nullptr ||
+ HasErrors |= (ResultIterSpace.PreCond == nullptr ||
+ ResultIterSpace.NumIterations == nullptr ||
ResultIterSpace.CounterVar == nullptr ||
ResultIterSpace.CounterInit == nullptr ||
ResultIterSpace.CounterStep == nullptr);
@@ -2690,6 +2709,9 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
// Last iteration number is (I1 * I2 * ... In) - 1, where I1, I2 ... In are
// the iteration counts of the collapsed for loops.
+ // Precondition tests if there is at least one iteration (all conditions are
+ // true).
+ auto PreCond = ExprResult(IterSpaces[0].PreCond);
auto N0 = IterSpaces[0].NumIterations;
ExprResult LastIteration32 = WidenIterationCount(32 /* Bits */, N0, SemaRef);
ExprResult LastIteration64 = WidenIterationCount(64 /* Bits */, N0, SemaRef);
@@ -2702,6 +2724,10 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
Scope *CurScope = DSA.getCurScope();
for (unsigned Cnt = 1; Cnt < NestedLoopCount; ++Cnt) {
+ if (PreCond.isUsable()) {
+ PreCond = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LAnd,
+ PreCond.get(), IterSpaces[Cnt].PreCond);
+ }
auto N = IterSpaces[Cnt].NumIterations;
AllCountsNeedLessThan32Bits &= C.getTypeSize(N->getType()) < 32;
if (LastIteration32.isUsable())
@@ -2763,11 +2789,6 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
SourceLocation InitLoc = IterSpaces[0].InitSrcRange.getBegin();
- // Precondition tests if there is at least one iteration (LastIteration > 0).
- ExprResult PreCond = SemaRef.BuildBinOp(
- CurScope, InitLoc, BO_GT, LastIteration.get(),
- SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get());
-
QualType VType = LastIteration.get()->getType();
// Build variables passed into runtime, nesessary for worksharing directives.
ExprResult LB, UB, IL, ST, EUB;
diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp
index a53af80ac64..85f1ba69b27 100644
--- a/clang/test/OpenMP/for_codegen.cpp
+++ b/clang/test/OpenMP/for_codegen.cpp
@@ -315,6 +315,31 @@ void runtime(float *a, float *b, float *c, float *d) {
// CHECK: ret void
}
+// CHECK-LABEL: test_precond
+void test_precond() {
+ // CHECK: [[A_ADDR:%.+]] = alloca i8,
+ // CHECK: [[I_ADDR:%.+]] = alloca i8,
+ char a = 0;
+ // CHECK: store i32 0, i32* [[IV_ADDR:%.+]],
+ // CHECK: [[A:%.+]] = load i8, i8* [[A_ADDR]],
+ // CHECK: [[CONV:%.+]] = sext i8 [[A]] to i32
+ // CHECK: [[IV:%.+]] = load i32, i32* [[IV_ADDR]],
+ // CHECK: [[MUL:%.+]] = mul nsw i32 [[IV]], 1
+ // CHECK: [[ADD:%.+]] = add nsw i32 [[CONV]], [[MUL]]
+ // CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+ // CHECK: store i8 [[CONV]], i8* [[I_ADDR]],
+ // CHECK: [[A:%.+]] = load i8, i8* [[A_ADDR]],
+ // CHECK: [[CONV:%.+]] = sext i8 [[A]] to i32
+ // CHECK: [[CMP:%.+]] = icmp slt i32 [[CONV]], 10
+ // CHECK: br i1 [[CMP]], label %[[PRECOND_THEN:[^,]+]], label %[[PRECOND_END:[^,]+]]
+ // CHECK: [[PRECOND_THEN]]
+ // CHECK: call void @__kmpc_for_static_init_4
+#pragma omp for
+ for(char i = a; i < 10; ++i);
+ // CHECK: call void @__kmpc_for_static_fini
+ // CHECK: [[PRECOND_END]]
+}
+
// TERM_DEBUG-LABEL: foo
int foo() {return 0;};
diff --git a/clang/test/OpenMP/for_firstprivate_codegen.cpp b/clang/test/OpenMP/for_firstprivate_codegen.cpp
index b2fe730b6ca..0baec9c1aa5 100644
--- a/clang/test/OpenMP/for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/for_firstprivate_codegen.cpp
@@ -40,7 +40,7 @@ T tmain() {
S<T> var(3);
#pragma omp parallel
#pragma omp for firstprivate(t_var, vec, s_arr, var)
- for (int i = 0; i < 0; ++i) {
+ for (int i = 0; i < 2; ++i) {
vec[i] = t_var;
s_arr[i] = var;
}
@@ -146,7 +146,7 @@ int main() {
return 0;
#else
#pragma omp for firstprivate(t_var, vec, s_arr, var)
- for (int i = 0; i < 0; ++i) {
+ for (int i = 0; i < 2; ++i) {
vec[i] = t_var;
s_arr[i] = var;
}
diff --git a/clang/test/OpenMP/for_lastprivate_codegen.cpp b/clang/test/OpenMP/for_lastprivate_codegen.cpp
index b9e23cee7a7..7d3337a2553 100644
--- a/clang/test/OpenMP/for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/for_lastprivate_codegen.cpp
@@ -67,12 +67,14 @@ int main() {
// LAMBDA: alloca i{{[0-9]+}},
// LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
// LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
- // LAMBDA: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 %{{.+}}, i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
+ // LAMBDA: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}
+ // LAMBDA: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+ // LAMBDA: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
// LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
// LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
// LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
- // LAMBDA: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}})
+ // LAMBDA: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
g = 1;
// Check for final copying of private values back to original vars.
// LAMBDA: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]],
@@ -86,8 +88,6 @@ int main() {
// LAMBDA: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G]],
// LAMBDA: br label %[[LAST_DONE]]
// LAMBDA: [[LAST_DONE]]
- // LAMBDA: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}
- // LAMBDA: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
// LAMBDA: call i32 @__kmpc_cancel_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]])
[&]() {
// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
@@ -119,13 +119,15 @@ int main() {
// BLOCKS: alloca i{{[0-9]+}},
// BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
// BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
- // BLOCKS: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 %{{.+}}, i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
+ // BLOCKS: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}
+ // BLOCKS: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+ // BLOCKS: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
// BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
// BLOCKS-NOT: [[G]]{{[[^:word:]]}}
// BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
// BLOCKS-NOT: [[G]]{{[[^:word:]]}}
// BLOCKS: call void {{%.+}}(i8
- // BLOCKS: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}})
+ // BLOCKS: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
g = 1;
// Check for final copying of private values back to original vars.
// BLOCKS: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]],
@@ -139,8 +141,6 @@ int main() {
// BLOCKS: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G]],
// BLOCKS: br label %[[LAST_DONE]]
// BLOCKS: [[LAST_DONE]]
- // BLOCKS: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}
- // BLOCKS: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
// BLOCKS: call i32 @__kmpc_cancel_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]])
g = 1;
^{
@@ -261,9 +261,11 @@ int main() {
// Check for default initialization.
// CHECK-NOT: [[X_PRIV]]
-// CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 %{{.+}}, i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
+// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
+// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+// CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
// <Skip loop body>
-// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
// Check for final copying of private values back to original vars.
// CHECK: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]],
@@ -278,8 +280,7 @@ int main() {
// CHECK-NEXT: br label %[[LAST_DONE]]
// CHECK: [[LAST_DONE]]
-// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
-// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
// CHECK: ret void
diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp
index 63ae11bea53..3942fad9328 100644
--- a/clang/test/OpenMP/parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/parallel_for_codegen.cpp
@@ -47,11 +47,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) {
// CHECK-NEXT: br label %{{.+}}
}
// CHECK: [[LOOP1_END]]
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
// CHECK: ret void
}
@@ -95,11 +91,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
// CHECK-NEXT: br label %{{.+}}
}
// CHECK: [[LOOP1_END]]
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
// CHECK: ret void
}
@@ -162,11 +154,7 @@ void static_chunked(float *a, float *b, float *c, float *d) {
// CHECK-NEXT: store i32 [[ADD_UB]], i32* [[OMP_UB]]
// CHECK: [[O_LOOP1_END]]
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
// CHECK: ret void
}
@@ -181,8 +169,6 @@ void dynamic1(float *a, float *b, float *c, float *d) {
// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 35, i64 0, i64 16908287, i64 1, i64 1)
//
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]])
// CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
// CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
@@ -214,8 +200,6 @@ void dynamic1(float *a, float *b, float *c, float *d) {
}
// CHECK: [[LOOP1_END]]
// CHECK: [[O_LOOP1_END]]
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
// CHECK: ret void
}
@@ -230,8 +214,6 @@ void guided7(float *a, float *b, float *c, float *d) {
// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 36, i64 0, i64 16908287, i64 1, i64 7)
//
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]])
// CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
// CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
@@ -263,8 +245,6 @@ void guided7(float *a, float *b, float *c, float *d) {
}
// CHECK: [[LOOP1_END]]
// CHECK: [[O_LOOP1_END]]
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
// CHECK: ret void
}
@@ -332,8 +312,6 @@ void runtime(float *a, float *b, float *c, float *d) {
// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 37, i32 0, i32 199, i32 1, i32 1)
//
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]])
// CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
// CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
diff --git a/clang/test/OpenMP/simd_codegen.cpp b/clang/test/OpenMP/simd_codegen.cpp
index 0111a27f774..c8998ebda03 100644
--- a/clang/test/OpenMP/simd_codegen.cpp
+++ b/clang/test/OpenMP/simd_codegen.cpp
@@ -176,28 +176,10 @@ void simple(float *a, float *b, float *c, float *d) {
}
// CHECK: [[SIMPLE_LOOP5_END]]
+// CHECK-NOT: mul i32 %{{.+}}, 10
#pragma omp simd
-// FIXME: I think we would get wrong result using 'unsigned' in the loop below.
-// So we'll need to add zero trip test for 'unsigned' counters.
-//
-// CHECK: store i32 0, i32* [[OMP_IV6:%[^,]+]]
-
-// CHECK: [[IV6:%.+]] = load i32, i32* [[OMP_IV6]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID:[0-9]+]]
-// CHECK-NEXT: [[CMP6:%.+]] = icmp slt i32 [[IV6]], -8
-// CHECK-NEXT: br i1 [[CMP6]], label %[[SIMPLE_LOOP6_BODY:.+]], label %[[SIMPLE_LOOP6_END:[^,]+]]
- for (int i=100; i<10; i+=10) {
-// CHECK: [[SIMPLE_LOOP6_BODY]]
-// Start of body: calculate i from IV:
-// CHECK: [[IV6_0:%.+]] = load i32, i32* [[OMP_IV6]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID]]
-// CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i32 [[IV6_0]], 10
-// CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i32 100, [[LC_IT_1]]
-// CHECK-NEXT: store i32 [[LC_IT_2]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID]]
-
-// CHECK: [[IV6_2:%.+]] = load i32, i32* [[OMP_IV6]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID]]
-// CHECK-NEXT: [[ADD6_2:%.+]] = add nsw i32 [[IV6_2]], 1
-// CHECK-NEXT: store i32 [[ADD6_2]], i32* [[OMP_IV6]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID]]
+ for (unsigned i=100; i<10; i+=10) {
}
-// CHECK: [[SIMPLE_LOOP6_END]]
int A;
#pragma omp simd lastprivate(A)
@@ -205,8 +187,6 @@ void simple(float *a, float *b, float *c, float *d) {
// Test checks that one iteration is separated in presence of lastprivate.
//
// CHECK: store i64 0, i64* [[OMP_IV7:%[^,]+]]
-// CHECK: br i1 true, label %[[SIMPLE_IF7_THEN:.+]], label %[[SIMPLE_IF7_END:[^,]+]]
-// CHECK: [[SIMPLE_IF7_THEN]]
// CHECK: br label %[[SIMD_LOOP7_COND:[^,]+]]
// CHECK: [[SIMD_LOOP7_COND]]
// CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID:[0-9]+]]
@@ -233,9 +213,6 @@ void simple(float *a, float *b, float *c, float *d) {
// CHECK: [[LOAD_I:%.+]] = load i64, i64* [[ADDR_I]]
// CHECK-NEXT: [[CONV_I:%.+]] = trunc i64 [[LOAD_I]] to i32
//
-// CHECK: br label %[[SIMPLE_IF7_END]]
-// CHECK: [[SIMPLE_IF7_END]]
-//
// CHECK: ret void
}
OpenPOWER on IntegriCloud