diff options
| author | Michael Zolotukhin <mzolotukhin@apple.com> | 2015-09-08 23:52:33 +0000 |
|---|---|---|
| committer | Michael Zolotukhin <mzolotukhin@apple.com> | 2015-09-08 23:52:33 +0000 |
| commit | 84df12375cdec370fd09a99e3de2e788351ff403 (patch) | |
| tree | e1f89510fbda184151560b0c8246103ff7d5f7c9 /clang/lib/CodeGen | |
| parent | 00691e3169fc90e4202c6a592e9fda20e006c299 (diff) | |
| download | bcm5719-llvm-84df12375cdec370fd09a99e3de2e788351ff403.tar.gz bcm5719-llvm-84df12375cdec370fd09a99e3de2e788351ff403.zip | |
Introduce __builtin_nontemporal_store and __builtin_nontemporal_load.
Summary:
Currently clang provides no general way to generate nontemporal loads/stores.
There are some architecture specific builtins for doing so (e.g. in x86), but
there is no way to generate non-temporal store on, e.g. AArch64. This patch adds
generic builtins which are expanded to a simple store with '!nontemporal'
attribute in IR.
Differential Revision: http://reviews.llvm.org/D12313
llvm-svn: 247104
Diffstat (limited to 'clang/lib/CodeGen')
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 26 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGExpr.cpp | 22 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGValue.h | 7 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.h | 5 |
4 files changed, 54 insertions, 6 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c35f25ad103..729c0a18b06 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -111,6 +111,28 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, return EmitFromInt(CGF, Result, T, ValueType); } +static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { + Value *Val = CGF.EmitScalarExpr(E->getArg(0)); + Value *Address = CGF.EmitScalarExpr(E->getArg(1)); + + // Convert the type of the pointer to a pointer to the stored type. + Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); + Value *BC = CGF.Builder.CreateBitCast( + Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); + LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); + LV.setNontemporal(true); + CGF.EmitStoreOfScalar(Val, LV, false); + return nullptr; +} + +static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { + Value *Address = CGF.EmitScalarExpr(E->getArg(0)); + + LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); + LV.setNontemporal(true); + return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); +} + static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E) { @@ -1143,6 +1165,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(nullptr); } + case Builtin::BI__builtin_nontemporal_load: + return RValue::get(EmitNontemporalLoad(*this, E)); + case Builtin::BI__builtin_nontemporal_store: + return RValue::get(EmitNontemporalStore(*this, E)); case Builtin::BI__c11_atomic_is_lock_free: case Builtin::BI__atomic_is_lock_free: { // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 6635e570c64..b7d1ef19b15 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1160,7 +1160,8 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue, return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(), lvalue.getType(), Loc, lvalue.getAlignmentSource(), lvalue.getTBAAInfo(), - lvalue.getTBAABaseType(), lvalue.getTBAAOffset()); + lvalue.getTBAABaseType(), lvalue.getTBAAOffset(), + lvalue.isNontemporal()); } static bool hasBooleanRepresentation(QualType Ty) { @@ -1226,7 +1227,8 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, AlignmentSource AlignSource, llvm::MDNode *TBAAInfo, QualType TBAABaseType, - uint64_t TBAAOffset) { + uint64_t TBAAOffset, + bool isNontemporal) { // For better performance, handle vector loads differently. if (Ty->isVectorType()) { const llvm::Type *EltTy = Addr.getElementType(); @@ -1258,6 +1260,11 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, } llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile); + if (isNontemporal) { + llvm::MDNode *Node = llvm::MDNode::get( + Load->getContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); + Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); + } if (TBAAInfo) { llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset); @@ -1330,7 +1337,8 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, AlignmentSource AlignSource, llvm::MDNode *TBAAInfo, bool isInit, QualType TBAABaseType, - uint64_t TBAAOffset) { + uint64_t TBAAOffset, + bool isNontemporal) { // Handle vectors differently to get better performance. if (Ty->isVectorType()) { @@ -1365,6 +1373,12 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, } llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile); + if (isNontemporal) { + llvm::MDNode *Node = + llvm::MDNode::get(Store->getContext(), + llvm::ConstantAsMetadata::get(Builder.getInt32(1))); + Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); + } if (TBAAInfo) { llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset); @@ -1378,7 +1392,7 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue, EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(), lvalue.getType(), lvalue.getAlignmentSource(), lvalue.getTBAAInfo(), isInit, lvalue.getTBAABaseType(), - lvalue.getTBAAOffset()); + lvalue.getTBAAOffset(), lvalue.isNontemporal()); } /// EmitLoadOfLValue - Given an expression that represents a value lvalue, this diff --git a/clang/lib/CodeGen/CGValue.h b/clang/lib/CodeGen/CGValue.h index 195571ba070..3ccc4cda89f 100644 --- a/clang/lib/CodeGen/CGValue.h +++ b/clang/lib/CodeGen/CGValue.h @@ -202,6 +202,10 @@ class LValue { unsigned AlignSource : 2; + // This flag shows if a nontemporal load/stores should be used when accessing + // this lvalue. + bool Nontemporal : 1; + Expr *BaseIvarExp; /// Used by struct-path-aware TBAA. @@ -228,6 +232,7 @@ private: // Initialize Objective-C flags. this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false; this->ImpreciseLifetime = false; + this->Nontemporal = false; this->ThreadLocalRef = false; this->BaseIvarExp = nullptr; @@ -277,6 +282,8 @@ public: void setARCPreciseLifetime(ARCPreciseLifetime_t value) { ImpreciseLifetime = (value == ARCImpreciseLifetime); } + bool isNontemporal() const { return Nontemporal; } + void setNontemporal(bool Value) { Nontemporal = Value; } bool isObjCWeak() const { return Quals.getObjCGCAttr() == Qualifiers::Weak; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 86154e923eb..39259329832 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2449,7 +2449,8 @@ public: AlignmentSource::Type, llvm::MDNode *TBAAInfo = nullptr, QualType TBAABaseTy = QualType(), - uint64_t TBAAOffset = 0); + uint64_t TBAAOffset = 0, + bool isNontemporal = false); /// EmitLoadOfScalar - Load a scalar value from an address, taking /// care to appropriately convert from the memory representation to @@ -2465,7 +2466,7 @@ public: AlignmentSource AlignSource = AlignmentSource::Type, llvm::MDNode *TBAAInfo = nullptr, bool isInit = false, QualType TBAABaseTy = QualType(), - uint64_t TBAAOffset = 0); + uint64_t TBAAOffset = 0, bool isNontemporal = false); /// EmitStoreOfScalar - Store a scalar value to an address, taking /// care to appropriately convert from the memory representation to |

