summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
authorMichael Zolotukhin <mzolotukhin@apple.com>2015-09-08 23:52:33 +0000
committerMichael Zolotukhin <mzolotukhin@apple.com>2015-09-08 23:52:33 +0000
commit84df12375cdec370fd09a99e3de2e788351ff403 (patch)
treee1f89510fbda184151560b0c8246103ff7d5f7c9 /clang/lib/CodeGen
parent00691e3169fc90e4202c6a592e9fda20e006c299 (diff)
downloadbcm5719-llvm-84df12375cdec370fd09a99e3de2e788351ff403.tar.gz
bcm5719-llvm-84df12375cdec370fd09a99e3de2e788351ff403.zip
Introduce __builtin_nontemporal_store and __builtin_nontemporal_load.
Summary: Currently clang provides no general way to generate nontemporal loads/stores. There are some architecture specific builtins for doing so (e.g. in x86), but there is no way to generate non-temporal store on, e.g. AArch64. This patch adds generic builtins which are expanded to a simple store with '!nontemporal' attribute in IR. Differential Revision: http://reviews.llvm.org/D12313 llvm-svn: 247104
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp26
-rw-r--r--clang/lib/CodeGen/CGExpr.cpp22
-rw-r--r--clang/lib/CodeGen/CGValue.h7
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h5
4 files changed, 54 insertions, 6 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c35f25ad103..729c0a18b06 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -111,6 +111,28 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
return EmitFromInt(CGF, Result, T, ValueType);
}
+static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
+ Value *Val = CGF.EmitScalarExpr(E->getArg(0));
+ Value *Address = CGF.EmitScalarExpr(E->getArg(1));
+
+ // Convert the type of the pointer to a pointer to the stored type.
+ Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
+ Value *BC = CGF.Builder.CreateBitCast(
+ Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
+ LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
+ LV.setNontemporal(true);
+ CGF.EmitStoreOfScalar(Val, LV, false);
+ return nullptr;
+}
+
+static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
+ Value *Address = CGF.EmitScalarExpr(E->getArg(0));
+
+ LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
+ LV.setNontemporal(true);
+ return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
+}
+
static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
llvm::AtomicRMWInst::BinOp Kind,
const CallExpr *E) {
@@ -1143,6 +1165,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(nullptr);
}
+ case Builtin::BI__builtin_nontemporal_load:
+ return RValue::get(EmitNontemporalLoad(*this, E));
+ case Builtin::BI__builtin_nontemporal_store:
+ return RValue::get(EmitNontemporalStore(*this, E));
case Builtin::BI__c11_atomic_is_lock_free:
case Builtin::BI__atomic_is_lock_free: {
// Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 6635e570c64..b7d1ef19b15 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1160,7 +1160,8 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue,
return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
lvalue.getType(), Loc, lvalue.getAlignmentSource(),
lvalue.getTBAAInfo(),
- lvalue.getTBAABaseType(), lvalue.getTBAAOffset());
+ lvalue.getTBAABaseType(), lvalue.getTBAAOffset(),
+ lvalue.isNontemporal());
}
static bool hasBooleanRepresentation(QualType Ty) {
@@ -1226,7 +1227,8 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
AlignmentSource AlignSource,
llvm::MDNode *TBAAInfo,
QualType TBAABaseType,
- uint64_t TBAAOffset) {
+ uint64_t TBAAOffset,
+ bool isNontemporal) {
// For better performance, handle vector loads differently.
if (Ty->isVectorType()) {
const llvm::Type *EltTy = Addr.getElementType();
@@ -1258,6 +1260,11 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
}
llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile);
+ if (isNontemporal) {
+ llvm::MDNode *Node = llvm::MDNode::get(
+ Load->getContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
+ Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
+ }
if (TBAAInfo) {
llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
TBAAOffset);
@@ -1330,7 +1337,8 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
AlignmentSource AlignSource,
llvm::MDNode *TBAAInfo,
bool isInit, QualType TBAABaseType,
- uint64_t TBAAOffset) {
+ uint64_t TBAAOffset,
+ bool isNontemporal) {
// Handle vectors differently to get better performance.
if (Ty->isVectorType()) {
@@ -1365,6 +1373,12 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
}
llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile);
+ if (isNontemporal) {
+ llvm::MDNode *Node =
+ llvm::MDNode::get(Store->getContext(),
+ llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
+ Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
+ }
if (TBAAInfo) {
llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
TBAAOffset);
@@ -1378,7 +1392,7 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue,
EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(),
lvalue.getType(), lvalue.getAlignmentSource(),
lvalue.getTBAAInfo(), isInit, lvalue.getTBAABaseType(),
- lvalue.getTBAAOffset());
+ lvalue.getTBAAOffset(), lvalue.isNontemporal());
}
/// EmitLoadOfLValue - Given an expression that represents a value lvalue, this
diff --git a/clang/lib/CodeGen/CGValue.h b/clang/lib/CodeGen/CGValue.h
index 195571ba070..3ccc4cda89f 100644
--- a/clang/lib/CodeGen/CGValue.h
+++ b/clang/lib/CodeGen/CGValue.h
@@ -202,6 +202,10 @@ class LValue {
unsigned AlignSource : 2;
+ // This flag shows if a nontemporal load/stores should be used when accessing
+ // this lvalue.
+ bool Nontemporal : 1;
+
Expr *BaseIvarExp;
/// Used by struct-path-aware TBAA.
@@ -228,6 +232,7 @@ private:
// Initialize Objective-C flags.
this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false;
this->ImpreciseLifetime = false;
+ this->Nontemporal = false;
this->ThreadLocalRef = false;
this->BaseIvarExp = nullptr;
@@ -277,6 +282,8 @@ public:
void setARCPreciseLifetime(ARCPreciseLifetime_t value) {
ImpreciseLifetime = (value == ARCImpreciseLifetime);
}
+ bool isNontemporal() const { return Nontemporal; }
+ void setNontemporal(bool Value) { Nontemporal = Value; }
bool isObjCWeak() const {
return Quals.getObjCGCAttr() == Qualifiers::Weak;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 86154e923eb..39259329832 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2449,7 +2449,8 @@ public:
AlignmentSource::Type,
llvm::MDNode *TBAAInfo = nullptr,
QualType TBAABaseTy = QualType(),
- uint64_t TBAAOffset = 0);
+ uint64_t TBAAOffset = 0,
+ bool isNontemporal = false);
/// EmitLoadOfScalar - Load a scalar value from an address, taking
/// care to appropriately convert from the memory representation to
@@ -2465,7 +2466,7 @@ public:
AlignmentSource AlignSource = AlignmentSource::Type,
llvm::MDNode *TBAAInfo = nullptr, bool isInit = false,
QualType TBAABaseTy = QualType(),
- uint64_t TBAAOffset = 0);
+ uint64_t TBAAOffset = 0, bool isNontemporal = false);
/// EmitStoreOfScalar - Store a scalar value to an address, taking
/// care to appropriately convert from the memory representation to
OpenPOWER on IntegriCloud