summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/AST/OpenMPClause.h23
-rw-r--r--clang/include/clang/AST/RecursiveASTVisitor.h3
-rw-r--r--clang/lib/AST/OpenMPClause.cpp12
-rw-r--r--clang/lib/AST/StmtProfile.cpp7
-rw-r--r--clang/lib/CodeGen/CGExpr.cpp33
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp41
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h19
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp5
-rw-r--r--clang/lib/Sema/SemaOpenMP.cpp46
-rw-r--r--clang/lib/Serialization/ASTReader.cpp5
-rw-r--r--clang/lib/Serialization/ASTWriter.cpp2
-rw-r--r--clang/test/OpenMP/distribute_simd_codegen.cpp12
-rw-r--r--clang/test/OpenMP/for_simd_codegen.cpp5
-rw-r--r--clang/test/OpenMP/simd_codegen.cpp59
-rw-r--r--clang/test/OpenMP/target_parallel_for_simd_codegen.cpp5
-rw-r--r--clang/test/OpenMP/target_simd_codegen.cpp150
-rw-r--r--clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp13
-rw-r--r--clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp4
-rw-r--r--clang/test/OpenMP/teams_distribute_simd_codegen.cpp8
-rw-r--r--clang/tools/libclang/CIndex.cpp2
20 files changed, 339 insertions, 115 deletions
diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index c708b011849..7cef0bc9092 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -6275,6 +6275,15 @@ class OMPNontemporalClause final
OMPC_nontemporal, SourceLocation(), SourceLocation(),
SourceLocation(), N) {}
+ /// Get the list of privatied copies if the member expression was captured by
+ /// one of the privatization clauses.
+ MutableArrayRef<Expr *> getPrivateRefs() {
+ return MutableArrayRef<Expr *>(varlist_end(), varlist_size());
+ }
+ ArrayRef<const Expr *> getPrivateRefs() const {
+ return llvm::makeArrayRef(varlist_end(), varlist_size());
+ }
+
public:
/// Creates clause with a list of variables \a VL.
///
@@ -6293,6 +6302,10 @@ public:
/// \param N The number of variables.
static OMPNontemporalClause *CreateEmpty(const ASTContext &C, unsigned N);
+ /// Sets the list of references to private copies created in private clauses.
+ /// \param VL List of references.
+ void setPrivateRefs(ArrayRef<Expr *> VL);
+
child_range children() {
return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
reinterpret_cast<Stmt **>(varlist_end()));
@@ -6303,6 +6316,16 @@ public:
return const_child_range(Children.begin(), Children.end());
}
+ child_range private_refs() {
+ return child_range(reinterpret_cast<Stmt **>(getPrivateRefs().begin()),
+ reinterpret_cast<Stmt **>(getPrivateRefs().end()));
+ }
+
+ const_child_range private_refs() const {
+ auto Children = const_cast<OMPNontemporalClause *>(this)->private_refs();
+ return const_child_range(Children.begin(), Children.end());
+ }
+
child_range used_children() {
return child_range(child_iterator(), child_iterator());
}
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index a93a01da34c..19dd62b0fe0 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3378,6 +3378,9 @@ template <typename Derived>
bool RecursiveASTVisitor<Derived>::VisitOMPNontemporalClause(
OMPNontemporalClause *C) {
TRY_TO(VisitOMPClauseList(C));
+ for (auto *E : C->private_refs()) {
+ TRY_TO(TraverseStmt(E));
+ }
return true;
}
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index f062c3e463a..790b4401229 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -1162,8 +1162,8 @@ OMPNontemporalClause *OMPNontemporalClause::Create(const ASTContext &C,
SourceLocation LParenLoc,
SourceLocation EndLoc,
ArrayRef<Expr *> VL) {
- // Allocate space for nontemporal variables.
- void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(VL.size()));
+ // Allocate space for nontemporal variables + private references.
+ void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(2 * VL.size()));
auto *Clause =
new (Mem) OMPNontemporalClause(StartLoc, LParenLoc, EndLoc, VL.size());
Clause->setVarRefs(VL);
@@ -1172,10 +1172,16 @@ OMPNontemporalClause *OMPNontemporalClause::Create(const ASTContext &C,
OMPNontemporalClause *OMPNontemporalClause::CreateEmpty(const ASTContext &C,
unsigned N) {
- void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(N));
+ void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(2 * N));
return new (Mem) OMPNontemporalClause(N);
}
+void OMPNontemporalClause::setPrivateRefs(ArrayRef<Expr *> VL) {
+ assert(VL.size() == varlist_size() && "Number of private references is not "
+ "the same as the preallocated buffer");
+ std::copy(VL.begin(), VL.end(), varlist_end());
+}
+
//===----------------------------------------------------------------------===//
// OpenMP clauses printing methods
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 56a65e8deb9..2aa5106e90f 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -769,10 +769,13 @@ void OMPClauseProfiler::VisitOMPIsDevicePtrClause(
const OMPIsDevicePtrClause *C) {
VisitOMPClauseList(C);
}
-void OMPClauseProfiler::VisitOMPNontemporalClause(const OMPNontemporalClause *C) {
+void OMPClauseProfiler::VisitOMPNontemporalClause(
+ const OMPNontemporalClause *C) {
VisitOMPClauseList(C);
+ for (auto *E : C->private_refs())
+ Profiler->VisitStmt(E);
}
-}
+} // namespace
void
StmtProfiler::VisitOMPExecutableDirective(const OMPExecutableDirective *S) {
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index cd42faefab0..c729037852e 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2566,21 +2566,35 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
VD = VD->getCanonicalDecl();
if (auto *FD = LambdaCaptureFields.lookup(VD))
return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue);
- else if (CapturedStmtInfo) {
+ if (CapturedStmtInfo) {
auto I = LocalDeclMap.find(VD);
if (I != LocalDeclMap.end()) {
+ LValue CapLVal;
if (VD->getType()->isReferenceType())
- return EmitLoadOfReferenceLValue(I->second, VD->getType(),
- AlignmentSource::Decl);
- return MakeAddrLValue(I->second, T);
+ CapLVal = EmitLoadOfReferenceLValue(I->second, VD->getType(),
+ AlignmentSource::Decl);
+ else
+ CapLVal = MakeAddrLValue(I->second, T);
+ // Mark lvalue as nontemporal if the variable is marked as nontemporal
+ // in simd context.
+ if (getLangOpts().OpenMP &&
+ CGM.getOpenMPRuntime().isNontemporalDecl(VD))
+ CapLVal.setNontemporal(/*Value=*/true);
+ return CapLVal;
}
LValue CapLVal =
EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD),
CapturedStmtInfo->getContextValue());
- return MakeAddrLValue(
+ CapLVal = MakeAddrLValue(
Address(CapLVal.getPointer(*this), getContext().getDeclAlign(VD)),
CapLVal.getType(), LValueBaseInfo(AlignmentSource::Decl),
CapLVal.getTBAAInfo());
+ // Mark lvalue as nontemporal if the variable is marked as nontemporal
+ // in simd context.
+ if (getLangOpts().OpenMP &&
+ CGM.getOpenMPRuntime().isNontemporalDecl(VD))
+ CapLVal.setNontemporal(/*Value=*/true);
+ return CapLVal;
}
assert(isa<BlockDecl>(CurCodeDecl));
@@ -3929,6 +3943,15 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
if (auto *Field = dyn_cast<FieldDecl>(ND)) {
LValue LV = EmitLValueForField(BaseLV, Field);
setObjCGCLValueClass(getContext(), E, LV);
+ if (getLangOpts().OpenMP) {
+ // If the member was explicitly marked as nontemporal, mark it as
+ // nontemporal. If the base lvalue is marked as nontemporal, mark access
+ // to children as nontemporal too.
+ if ((IsWrappedCXXThis(BaseExpr) &&
+ CGM.getOpenMPRuntime().isNontemporalDecl(Field)) ||
+ BaseLV.isNontemporal())
+ LV.setNontemporal(/*Value=*/true);
+ }
return LV;
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 1aae18b4504..2a53e999353 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -17,6 +17,7 @@
#include "CodeGenFunction.h"
#include "clang/AST/Attr.h"
#include "clang/AST/Decl.h"
+#include "clang/AST/OpenMPClause.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/Basic/BitmaskEnum.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
@@ -11341,6 +11342,46 @@ bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
return true;
}
+CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
+ CodeGenModule &CGM, const OMPLoopDirective &S)
+ : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
+ assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
+ if (!NeedToPush)
+ return;
+ NontemporalDeclsSet &DS =
+ CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
+ for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
+ for (const Stmt *Ref : C->private_refs()) {
+ const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
+ const ValueDecl *VD;
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
+ VD = DRE->getDecl();
+ } else {
+ const auto *ME = cast<MemberExpr>(SimpleRefExpr);
+ assert((ME->isImplicitCXXThis() ||
+ isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
+ "Expected member of current class.");
+ VD = ME->getMemberDecl();
+ }
+ DS.insert(VD);
+ }
+ }
+}
+
+CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
+ if (!NeedToPush)
+ return;
+ CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
+}
+
+bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
+ assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
+
+ return llvm::any_of(
+ CGM.getOpenMPRuntime().NontemporalDeclsStack,
+ [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
+}
+
llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index d806c27fdcb..f1582457ed2 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -211,6 +211,16 @@ public:
~DisableAutoDeclareTargetRAII();
};
+ /// Manages list of nontemporal decls for the specified directive.
+ class NontemporalDeclsRAII {
+ CodeGenModule &CGM;
+ const bool NeedToPush;
+
+ public:
+ NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S);
+ ~NontemporalDeclsRAII();
+ };
+
protected:
CodeGenModule &CGM;
StringRef FirstSeparator, Separator;
@@ -650,6 +660,11 @@ private:
std::pair<GlobalDecl, GlobalDecl>>
DeferredVariantFunction;
+ using NontemporalDeclsSet = llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>>;
+ /// Stack for list of declarations in current context marked as nontemporal.
+ /// The set is the union of all current stack elements.
+ llvm::SmallVector<NontemporalDeclsSet, 4> NontemporalDeclsStack;
+
/// Flag for keeping track of weather a requires unified_shared_memory
/// directive is present.
bool HasRequiresUnifiedSharedMemory = false;
@@ -1663,6 +1678,10 @@ public:
/// Emits the definition of the declare variant function.
virtual bool emitDeclareVariant(GlobalDecl GD, bool IsForDefinition);
+
+ /// Checks if the \p VD variable is marked as nontemporal declaration in
+ /// current context.
+ bool isNontemporalDecl(const ValueDecl *VD) const;
};
/// Class supports emissionof SIMD-only code.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index e8a3790e3b5..a2a59b64ddb 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1803,8 +1803,9 @@ static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
const RegionCodeGenTy &SimdInitGen,
const RegionCodeGenTy &BodyCodeGen) {
- auto &&ThenGen = [&SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
SimdInitGen(CGF);
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 915bf53d06e..7365fda62be 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -2230,6 +2230,11 @@ void Sema::EndOpenMPClause() {
static void checkAllocateClauses(Sema &S, DSAStackTy *Stack,
ArrayRef<OMPClause *> Clauses);
+static std::pair<ValueDecl *, bool>
+getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc,
+ SourceRange &ERange, bool AllowArraySection = false);
+static DeclRefExpr *buildCapture(Sema &S, ValueDecl *D, Expr *CaptureExpr,
+ bool WithInit);
void Sema::EndOpenMPDSABlock(Stmt *CurDirective) {
// OpenMP [2.14.3.5, Restrictions, C/C++, p.1]
@@ -2274,6 +2279,31 @@ void Sema::EndOpenMPDSABlock(Stmt *CurDirective) {
}
}
Clause->setPrivateCopies(PrivateCopies);
+ continue;
+ }
+ // Finalize nontemporal clause by handling private copies, if any.
+ if (auto *Clause = dyn_cast<OMPNontemporalClause>(C)) {
+ SmallVector<Expr *, 8> PrivateRefs;
+ for (Expr *RefExpr : Clause->varlists()) {
+ assert(RefExpr && "NULL expr in OpenMP nontemporal clause.");
+ SourceLocation ELoc;
+ SourceRange ERange;
+ Expr *SimpleRefExpr = RefExpr;
+ auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange);
+ if (Res.second)
+ // It will be analyzed later.
+ PrivateRefs.push_back(RefExpr);
+ ValueDecl *D = Res.first;
+ if (!D)
+ continue;
+
+ const DSAStackTy::DSAVarData DVar =
+ DSAStack->getTopDSA(D, /*FromParent=*/false);
+ PrivateRefs.push_back(DVar.PrivateCopy ? DVar.PrivateCopy
+ : SimpleRefExpr);
+ }
+ Clause->setPrivateRefs(PrivateRefs);
+ continue;
}
}
// Check allocate clauses.
@@ -4262,9 +4292,10 @@ static bool checkIfClauses(Sema &S, OpenMPDirectiveKind Kind,
return ErrorFound;
}
-static std::pair<ValueDecl *, bool>
-getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc,
- SourceRange &ERange, bool AllowArraySection = false) {
+static std::pair<ValueDecl *, bool> getPrivateItem(Sema &S, Expr *&RefExpr,
+ SourceLocation &ELoc,
+ SourceRange &ERange,
+ bool AllowArraySection) {
if (RefExpr->isTypeDependent() || RefExpr->isValueDependent() ||
RefExpr->containsUnexpandedParameterPack())
return std::make_pair(nullptr, true);
@@ -17172,8 +17203,6 @@ OMPClause *Sema::ActOnOpenMPNontemporalClause(ArrayRef<Expr *> VarList,
if (!D)
continue;
- auto *VD = dyn_cast<VarDecl>(D);
-
// OpenMP 5.0, 2.9.3.1 simd Construct, Restrictions.
// A list-item cannot appear in more than one nontemporal clause.
if (const Expr *PrevRef =
@@ -17185,12 +17214,7 @@ OMPClause *Sema::ActOnOpenMPNontemporalClause(ArrayRef<Expr *> VarList,
continue;
}
- DeclRefExpr *Ref = nullptr;
- if (!VD && isOpenMPCapturedDecl(D) && !CurContext->isDependentContext())
- Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/true);
- Vars.push_back((VD || !Ref || CurContext->isDependentContext())
- ? RefExpr->IgnoreParens()
- : Ref);
+ Vars.push_back(RefExpr);
}
if (Vars.empty())
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 9258a6fb126..032ce888969 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -12466,4 +12466,9 @@ void OMPClauseReader::VisitOMPNontemporalClause(OMPNontemporalClause *C) {
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
+ Vars.clear();
+ Vars.reserve(NumVars);
+ for (unsigned i = 0; i != NumVars; ++i)
+ Vars.push_back(Record.readSubExpr());
+ C->setPrivateRefs(Vars);
}
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index a632072d01b..2e2ce0e875d 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -6544,4 +6544,6 @@ void OMPClauseWriter::VisitOMPNontemporalClause(OMPNontemporalClause *C) {
Record.AddSourceLocation(C->getLParenLoc());
for (auto *VE : C->varlists())
Record.AddStmt(VE);
+ for (auto *E : C->private_refs())
+ Record.AddStmt(E);
}
diff --git a/clang/test/OpenMP/distribute_simd_codegen.cpp b/clang/test/OpenMP/distribute_simd_codegen.cpp
index f0128a977d6..104b1ba91b6 100644
--- a/clang/test/OpenMP/distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_codegen.cpp
@@ -143,7 +143,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
#pragma omp target
#pragma omp teams
#ifdef OMP5
- #pragma omp distribute simd dist_schedule(static) safelen(32) if(simd: true)
+ #pragma omp distribute simd dist_schedule(static) safelen(32) if(simd: true) nontemporal(a, b)
#else
#pragma omp distribute simd dist_schedule(static) safelen(32)
#endif // OMP5
@@ -189,6 +189,11 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
// CHECK: [[BBINNBODY]]:
// CHECK: {{.+}} = load i32, i32* [[IV]]
// ... loop body ...
+// OMP45-NOT: !nontemporal
+// OMP50: load float*,{{.*}}!nontemporal
+// OMP50: load float*,{{.*}}!nontemporal
+// OMP50-NOT: !nontemporal
+
// CHECK: br label %[[BBBODYCONT:.+]]
// CHECK: [[BBBODYCONT]]:
// CHECK: br label %[[BBINNINC:.+]]
@@ -271,7 +276,7 @@ void test_precond() {
#pragma omp target
#pragma omp teams
#ifdef OMP5
- #pragma omp distribute simd linear(i) if(a)
+ #pragma omp distribute simd linear(i) if(a) nontemporal(i)
#else
#pragma omp distribute simd linear(i)
#endif // OMP5
@@ -293,6 +298,9 @@ void test_precond() {
// CHECK: br i1 [[ACMP]], label %[[PRECOND_THEN:.+]], label %[[PRECOND_END:.+]]
// CHECK: [[PRECOND_THEN]]
// CHECK: call void @__kmpc_for_static_init_4
+// OMP45-NOT: !nontemporal
+// OMP50: store i8 {{.*}}!nontemporal
+// OMP50-NOT: !nontemporal
// CHECK: call void @__kmpc_for_static_fini
// CHECK: [[PRECOND_END]]
diff --git a/clang/test/OpenMP/for_simd_codegen.cpp b/clang/test/OpenMP/for_simd_codegen.cpp
index 4bc6b362f02..a668cb77a06 100644
--- a/clang/test/OpenMP/for_simd_codegen.cpp
+++ b/clang/test/OpenMP/for_simd_codegen.cpp
@@ -333,7 +333,7 @@ void simple(float *a, float *b, float *c, float *d) {
// OMP50: br i1 [[COND]], label {{%?}}[[THEN:[^,]+]], label {{%?}}[[ELSE:[^,]+]]
// OMP50: [[THEN]]:
#ifdef OMP5
- #pragma omp for simd reduction(*:R) if (simd:A)
+ #pragma omp for simd reduction(*:R) if (simd:A) nontemporal(R)
#else
#pragma omp for simd reduction(*:R)
#endif
@@ -366,7 +366,8 @@ void simple(float *a, float *b, float *c, float *d) {
// CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]]
// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],
// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]
-// CHECK: store i32 %{{.+}}, i32* [[R_PRIV]],
+// OMP45: store i32 %{{.+}}, i32* [[R_PRIV]],
+// OMP50: store i32 %{{.+}}, i32* [[R_PRIV]],{{.*}}!nontemporal
R *= i;
// CHECK: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]]
// CHECK-NEXT: [[ADD8_2:%.+]] = add nsw i64 [[IV8_2]], 1
diff --git a/clang/test/OpenMP/simd_codegen.cpp b/clang/test/OpenMP/simd_codegen.cpp
index 19ed323af42..aac76ad127d 100644
--- a/clang/test/OpenMP/simd_codegen.cpp
+++ b/clang/test/OpenMP/simd_codegen.cpp
@@ -22,10 +22,15 @@
long long get_val() { return 0; }
double *g_ptr;
+struct S {
+ int a, b;
+};
+
// CHECK-LABEL: define {{.*void}} @{{.*}}simple{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
void simple(float *a, float *b, float *c, float *d) {
+ S s, *p;
#ifdef OMP5
- #pragma omp simd if (simd: true)
+ #pragma omp simd if (simd: true) nontemporal(a, b, c, d, s)
#else
#pragma omp simd
#endif
@@ -43,8 +48,17 @@ void simple(float *a, float *b, float *c, float *d) {
// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]{{.*}}!llvm.access.group
// ... loop body ...
// End of body: store into a[i]:
+// OMP45-NOT: load float*,{{.*}}!nontemporal
+// CHECK-NOT: load float,{{.*}}!nontemporal
+// OMP50: load float*,{{.*}}!nontemporal
+// OMP50: load float*,{{.*}}!nontemporal
+// OMP50: load float*,{{.*}}!nontemporal
+// OMP50: load i32,{{.*}}!nontemporal
+// OMP50-NOT: load i32,{{.*}}!nontemporal
+// OMP50: load float*,{{.*}}!nontemporal
+// CHECK-NOT: load float,{{.*}}!nontemporal
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}{{.*}}!llvm.access.group
- a[i] = b[i] * c[i] * d[i];
+ a[i] = b[i] * c[i] * d[i] + s.a + p->a;
// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]{{.*}}!llvm.access.group
@@ -718,6 +732,47 @@ void linear(float *a) {
//
}
+#ifdef OMP5
+// OMP50-LABEL: inner_simd
+void inner_simd() {
+ double a, b;
+#pragma omp simd nontemporal(a)
+ for (int i = 0; i < 10; ++i) {
+#pragma omp simd nontemporal(b)
+ for (int k = 0; k < 10; ++k) {
+ // OMP50: load double,{{.*}}!nontemporal
+ // OMP50: store double{{.*}}!nontemporal
+ a = b;
+ }
+ // OMP50-NOT: load double,{{.*}}!nontemporal
+ // OMP50: load double,
+ // OMP50: store double{{.*}}!nontemporal
+ a = b;
+ }
+}
+
+extern struct T t;
+struct Base {
+ float a;
+};
+struct T : public Base {
+ void foo() {
+#pragma omp simd nontemporal(Base::a)
+ for (int i = 0; i < 10; ++i) {
+ // OMP50: store float{{.*}}!nontemporal
+ // OMP50-NOT: nontemporal
+ // OMP50-NEXT: store float
+ Base::a = 0;
+ t.a = 0;
+ }
+ }
+} t;
+
+void bartfoo() {
+ t.foo();
+}
+
+#endif // OMP5
// TERM_DEBUG-LABEL: bar
int bar() {return 0;};
diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
index 5e1ed2d6275..055d5dce28b 100644
--- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
@@ -539,7 +539,7 @@ struct S1 {
short int c[2][n];
#ifdef OMP5
- #pragma omp target parallel for simd if(n>60)
+ #pragma omp target parallel for simd if(n>60) nontemporal(a)
#else
#pragma omp target parallel for simd if(target: n>60)
#endif // OMP5
@@ -837,6 +837,9 @@ int bar(int n){
// OMP45: define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}})
// OMP50: define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}}, i[[SZ]] %{{.+}})
// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+// OMP45-NOT: !nontemporal
+// OMP50: store double{{.*}}!nontemporal
+// OMP50: load double{{.*}}!nontemporal
// CHECK: define internal void [[HVT6]]
diff --git a/clang/test/OpenMP/target_simd_codegen.cpp b/clang/test/OpenMP/target_simd_codegen.cpp
index 6755b7e657e..597cff7815a 100644
--- a/clang/test/OpenMP/target_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_simd_codegen.cpp
@@ -85,8 +85,8 @@
// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 800, i64 800, i64 547]
// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i64] [i64 4, i64 2, i64 1, i64 40]
// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i64] [i64 800, i64 800, i64 800, i64 547]
-// OMP45-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [6 x i64] [i64 32, i64 281474976711171, i64 800, i64 800, i64 800, i64 547]
-// OMP50-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [7 x i64] [i64 32, i64 281474976711171, i64 800, i64 800, i64 800, i64 547, i64 800]
+// OMP45-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 800, i64 800, i64 800, i64 547]
+// OMP50-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [6 x i64] [i64 547, i64 800, i64 800, i64 800, i64 547, i64 800]
// CHECK-DAG: @{{.*}} = weak constant i8 0
// CHECK-DAG: @{{.*}} = weak constant i8 0
// CHECK-DAG: @{{.*}} = weak constant i8 0
@@ -461,7 +461,11 @@ struct S1 {
int b = n+1;
short int c[2][n];
- #pragma omp target simd if(n>60)
+#ifdef OMP5
+ #pragma omp target simd if(n>60) nontemporal(a) private(a)
+#else
+ #pragma omp target simd if(n>60) private(a)
+#endif // OMP5
for (unsigned long long it = 2000; it >= 600; it -= 400) {
this->a = (double)b + 1.5;
c[1][1] = ++a;
@@ -519,96 +523,84 @@ int bar(int n){
// CHECK-32: [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
// CHECK-32: [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
-// OMP45-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 1)
-// OMP50-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 7, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 1)
-// OMP45-DAG: [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
-// OMP45-DAG: [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
-// OMP45-DAG: [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
-// OMP45-DAG: [[SADDR0:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX0:[0-9]+]]
-// OMP45-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX0]]
-// OMP45-DAG: [[PADDR0:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX0]]
-// OMP45-DAG: [[SADDR1:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX1:[0-9]+]]
-// OMP45-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX1]]
-// OMP45-DAG: [[PADDR1:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX1]]
-// OMP45-DAG: [[SADDR2:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX2:[0-9]+]]
-// OMP45-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX2]]
-// OMP45-DAG: [[PADDR2:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX2]]
-// OMP45-DAG: [[SADDR3:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX3:[0-9]+]]
-// OMP45-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX3]]
-// OMP45-DAG: [[PADDR3:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX3]]
-// OMP45-DAG: [[SADDR4:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX4:[0-9]+]]
-// OMP45-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX4]]
-// OMP45-DAG: [[PADDR4:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX4]]
-// OMP45-DAG: [[SADDR5:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX5:[0-9]+]]
-// OMP45-DAG: [[BPADDR5:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX5]]
-// OMP45-DAG: [[PADDR5:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX5]]
-// OMP50-DAG: [[BPR]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP:%.+]], i32 0, i32 0
-// OMP50-DAG: [[PR]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P:%.+]], i32 0, i32 0
-// OMP50-DAG: [[SR]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S:%.+]], i32 0, i32 0
-// OMP50-DAG: [[SADDR0:%.+]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S]], i32 [[IDX0:[0-9]+]]
-// OMP50-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP]], i32 [[IDX0]]
-// OMP50-DAG: [[PADDR0:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P]], i32 [[IDX0]]
-// OMP50-DAG: [[SADDR1:%.+]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S]], i32 [[IDX1:[0-9]+]]
-// OMP50-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP]], i32 [[IDX1]]
-// OMP50-DAG: [[PADDR1:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P]], i32 [[IDX1]]
-// OMP50-DAG: [[SADDR2:%.+]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S]], i32 [[IDX2:[0-9]+]]
-// OMP50-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP]], i32 [[IDX2]]
-// OMP50-DAG: [[PADDR2:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P]], i32 [[IDX2]]
-// OMP50-DAG: [[SADDR3:%.+]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S]], i32 [[IDX3:[0-9]+]]
-// OMP50-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP]], i32 [[IDX3]]
-// OMP50-DAG: [[PADDR3:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P]], i32 [[IDX3]]
-// OMP50-DAG: [[SADDR4:%.+]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S]], i32 [[IDX4:[0-9]+]]
-// OMP50-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP]], i32 [[IDX4]]
-// OMP50-DAG: [[PADDR4:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P]], i32 [[IDX4]]
-// OMP50-DAG: [[SADDR5:%.+]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S]], i32 [[IDX5:[0-9]+]]
-// OMP50-DAG: [[BPADDR5:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP]], i32 [[IDX5]]
-// OMP50-DAG: [[PADDR5:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P]], i32 [[IDX5]]
-// OMP50-DAG: [[SADDR6:%.+]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S]], i32 [[IDX6:[0-9]+]]
-// OMP50-DAG: [[BPADDR6:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP]], i32 [[IDX6]]
-// OMP50-DAG: [[PADDR6:%.+]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P]], i32 [[IDX6]]
+// OMP45-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 1)
+// OMP50-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 1)
+// OMP45-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
+// OMP45-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
+// OMP45-DAG: [[SR]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S:%.+]], i32 0, i32 0
+// OMP45-DAG: [[SADDR0:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 [[IDX0:[0-9]+]]
+// OMP45-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0]]
+// OMP45-DAG: [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX0]]
+// OMP45-DAG: [[SADDR1:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 [[IDX1:[0-9]+]]
+// OMP45-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1]]
+// OMP45-DAG: [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX1]]
+// OMP45-DAG: [[SADDR2:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 [[IDX2:[0-9]+]]
+// OMP45-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2]]
+// OMP45-DAG: [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX2]]
+// OMP45-DAG: [[SADDR3:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 [[IDX3:[0-9]+]]
+// OMP45-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3]]
+// OMP45-DAG: [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX3]]
+// OMP45-DAG: [[SADDR4:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 [[IDX4:[0-9]+]]
+// OMP45-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX4]]
+// OMP45-DAG: [[PADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX4]]
+// OMP50-DAG: [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
+// OMP50-DAG: [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
+// OMP50-DAG: [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
+// OMP50-DAG: [[SADDR0:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX0:[0-9]+]]
+// OMP50-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX0]]
+// OMP50-DAG: [[PADDR0:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX0]]
+// OMP50-DAG: [[SADDR1:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX1:[0-9]+]]
+// OMP50-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX1]]
+// OMP50-DAG: [[PADDR1:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX1]]
+// OMP50-DAG: [[SADDR2:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX2:[0-9]+]]
+// OMP50-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX2]]
+// OMP50-DAG: [[PADDR2:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX2]]
+// OMP50-DAG: [[SADDR3:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX3:[0-9]+]]
+// OMP50-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX3]]
+// OMP50-DAG: [[PADDR3:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX3]]
+// OMP50-DAG: [[SADDR4:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX4:[0-9]+]]
+// OMP50-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX4]]
+// OMP50-DAG: [[PADDR4:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX4]]
+// OMP50-DAG: [[SADDR5:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX5:[0-9]+]]
+// OMP50-DAG: [[BPADDR5:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX5]]
+// OMP50-DAG: [[PADDR5:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX5]]
// The names below are not necessarily consistent with the names used for the
// addresses above as some are repeated.
// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CBPADDR0:%.+]],
-// CHECK-DAG: store double* %{{.+}}, double** [[CPADDR0:%.+]],
+// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CPADDR0:%.+]],
// CHECK-DAG: [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to [[S1]]**
-// CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to double**
-// CHECK-DAG: store i64 %{{.+}}, i64* {{%[^,]+}}
-
-// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CBPADDR1:%.+]],
-// CHECK-DAG: store double* %{{.+}}, double** [[CPADDR1:%.+]],
-// CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to [[S1]]**
-// CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to double**
+// CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to [[S1]]**
// CHECK-DAG: store i64 {{4|8}}, i64* {{%[^,]+}}
-// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CBPADDR2:%.+]],
-// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CPADDR2:%.+]],
+// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CBPADDR1:%.+]],
+// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CPADDR1:%.+]],
+// CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG: store i64 4, i64* {{%[^,]+}}
+
+// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CBPADDR2:%.+]],
+// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CPADDR2:%.+]],
// CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
-// CHECK-DAG: store i64 4, i64* {{%[^,]+}}
+// CHECK-DAG: store i64 {{4|8}}, i64* {{%[^,]+}}
-// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CBPADDR3:%.+]],
-// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CPADDR3:%.+]],
+// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR3:%.+]],
+// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR3:%.+]],
// CHECK-DAG: [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// CHECK-DAG: [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// CHECK-DAG: store i64 {{4|8}}, i64* {{%[^,]+}}
-// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR4:%.+]],
-// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR4:%.+]],
-// CHECK-DAG: [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
-// CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
-// CHECK-DAG: store i64 {{4|8}}, i64* {{%[^,]+}}
-
-// CHECK-DAG: store i16* %{{.+}}, i16** [[CBPADDR5:%.+]],
-// CHECK-DAG: store i16* %{{.+}}, i16** [[CPADDR5:%.+]],
-// CHECK-DAG: [[CBPADDR5]] = bitcast i8** {{%[^,]+}} to i16**
-// CHECK-DAG: [[CPADDR5]] = bitcast i8** {{%[^,]+}} to i16**
+// CHECK-DAG: store i16* %{{.+}}, i16** [[CBPADDR4:%.+]],
+// CHECK-DAG: store i16* %{{.+}}, i16** [[CPADDR4:%.+]],
+// CHECK-DAG: [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to i16**
+// CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to i16**
// CHECK-DAG: store i64 [[CSIZE]], i64* {{%[^,]+}}
-// OMP50-DAG: store i[[SZ]] [[SIMD_COND]], i[[SZ]]* [[CBPADDR6:%.+]],
-// OMP50-DAG: store i[[SZ]] [[SIMD_COND]], i[[SZ]]* [[CPADDR6:%.+]],
-// OMP50-DAG: [[CBPADDR6]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
-// OMP50-DAG: [[CPADDR6]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// OMP50-DAG: store i[[SZ]] [[SIMD_COND]], i[[SZ]]* [[CBPADDR5:%.+]],
+// OMP50-DAG: store i[[SZ]] [[SIMD_COND]], i[[SZ]]* [[CPADDR5:%.+]],
+// OMP50-DAG: [[CBPADDR5]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// OMP50-DAG: [[CPADDR5]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
// OMP50-DAG: store i64 1, i64* {{%[^,]+}}
// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
@@ -738,6 +730,10 @@ int bar(int n){
// OMP50-DAG: [[CONV_COND:%.+]] = bitcast i[[SZ]]* [[LOCAL_SIMD_COND_CASTED]] to i8*
// OMP50-DAG: [[SIMD_COND:%.+]] = load i8, i8* [[CONV_COND]],
// OMP50-DAG: trunc i8 [[SIMD_COND]] to i1
+// OMP45-NOT: !nontemporal
+// OMP50: store double {{.*}}!nontemporal
+// OMP50: load double, {{.*}}!nontemporal
+// OMP50: store double {{.*}}!nontemporal
// CHECK: define internal void [[HVT6]]
// Create local storage for each capture.
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
index 8a8d64e36f8..dda468e604e 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
@@ -35,20 +35,25 @@ void gtid_test() {
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
#ifdef OMP5
-#pragma omp target teams distribute parallel for simd if(simd: true)
+#pragma omp target teams distribute parallel for simd if(simd: true) nontemporal(Arg)
#else
#pragma omp target teams distribute parallel for simd
#endif // OMP5
- for(int i = 0 ; i < 100; i++) {}
+ for (int i = 0; i < 100; i++) {
+ Arg = 0;
+ }
// CHECK: define internal void [[OFFLOADING_FUN_0]](
- // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+ // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
// CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
// CHECK: call void @__kmpc_for_static_init_4(
- // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_0:@.+]] to void
+ // OMP50: load i32,{{.*}}!nontemporal
+ // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 3, {{.+}}* [[OMP_OUTLINED_0:@.+]] to void
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: define{{.+}} void [[OMP_OUTLINED_0]](
// CHECK: call void @__kmpc_for_static_init_4(
+ // OMP45-NOT: !nontemporal
+ // OMP50: store i32 0,{{.*}}!nontemporal
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: ret
#pragma omp target teams distribute parallel for simd if (parallel: false)
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
index 14c3cd62ebf..4912352e17c 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
@@ -165,7 +165,7 @@ int foo(int n) {
// CHECK: call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}})
#ifdef OMP5
- #pragma omp target teams distribute simd if(target: 0) safelen(32) linear(a) if(simd: 1)
+ #pragma omp target teams distribute simd if(target: 0) safelen(32) linear(a) if(simd: 1) nontemporal(a)
#else
#pragma omp target teams distribute simd if(target: 0) safelen(32) linear(a)
#endif // OMP5
@@ -395,6 +395,8 @@ int foo(int n) {
// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align
// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// OMP45-NOT: !nontemporal
+// OMP50: load i32,{{.*}}!nontemporal
// CHECK-64: store i32 10, i32* [[AA_CADDR]], align
// CHECK-32: store i32 10, i32* [[AA_ADDR]], align
// CHECK: ret void
diff --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
index b9ee8cc728f..63dd237a964 100644
--- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
@@ -177,16 +177,16 @@ struct SS{
// CK3: define {{.*}}i32 @{{.+}}foo{{.+}}(
int foo(void) {
- // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+ // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
// CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}})
#pragma omp target
#ifdef OMP5
- #pragma omp teams distribute simd if(b)
+ #pragma omp teams distribute simd if(b) nontemporal(a, b)
#else
#pragma omp teams distribute simd
#endif // OMP5
for(int i = 0; i < X; i++) {
- a[i] = (T)0;
+ a[i] = (T)b;
}
// outlined target region
@@ -197,6 +197,8 @@ struct SS{
// CK3: define internal void @[[OUTL1]]({{.+}})
// CK3: call void @__kmpc_for_static_init_4(
+ // OMP3_45-NOT: !nontemporal
+ // OMP3_50: load float,{{.*}}!nontemporal
// CK3: call void @__kmpc_for_static_fini(
// CK3: ret void
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 229abece37a..94084e169fa 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2458,6 +2458,8 @@ void OMPClauseEnqueue::VisitOMPIsDevicePtrClause(const OMPIsDevicePtrClause *C)
void OMPClauseEnqueue::VisitOMPNontemporalClause(
const OMPNontemporalClause *C) {
VisitOMPClauseList(C);
+ for (const auto *E : C->private_refs())
+ Visitor->AddStmt(E);
}
}
OpenPOWER on IntegriCloud