diff options
author | Vedant Kumar <vsk@apple.com> | 2019-01-04 17:43:22 +0000 |
---|---|---|
committer | Vedant Kumar <vsk@apple.com> | 2019-01-04 17:43:22 +0000 |
commit | a1778df4740fc690cbab62ec74342795b84a9a5c (patch) | |
tree | fabe87231f1cbac065c7b3643171153f65cb9840 /llvm | |
parent | 722466e1f176f22d2339acb750f93498964b07ee (diff) | |
download | bcm5719-llvm-a1778df4740fc690cbab62ec74342795b84a9a5c.tar.gz bcm5719-llvm-a1778df4740fc690cbab62ec74342795b84a9a5c.zip |
[CodeExtractor] Do not extract unsafe lifetime markers
Lifetime markers which reference inputs to the extraction region are not
safe to extract. Example ('rhs' will be extracted):
```
entry:
+------------+
| x = alloca |
| y = alloca |
+------------+
/ \
lhs: rhs:
+-------------------+ +-------------------+
| lifetime_start(x) | | lifetime_start(x) |
| use(x) | | lifetime_start(y) |
| lifetime_end(x) | | use(x, y) |
| lifetime_start(y) | | lifetime_end(y) |
| use(y) | | lifetime_end(x) |
| lifetime_end(y) | +-------------------+
+-------------------+
```
Prior to extraction, the stack coloring pass sees that the slots for 'x'
and 'y' are in-use at the same time. After extraction, the coloring pass
infers that 'x' and 'y' are *not* in-use concurrently, because markers
from 'rhs' are no longer available to help decide otherwise.
This leads to a miscompile, because the stack slots actually are in-use
concurrently in the extracted function.
Fix this by moving lifetime start/end markers for memory regions defined
in the calling function around the call to the extracted function.
Fixes llvm.org/PR39671 (rdar://45939472).
Differential Revision: https://reviews.llvm.org/D55967
llvm-svn: 350420
Diffstat (limited to 'llvm')
5 files changed, 166 insertions, 16 deletions
diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 498ff5a7883..fee79fdc3bf 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -27,6 +27,7 @@ class BasicBlock; class BlockFrequency; class BlockFrequencyInfo; class BranchProbabilityInfo; +class CallInst; class DominatorTree; class Function; class Instruction; @@ -164,10 +165,9 @@ class Value; DenseMap<BasicBlock *, BlockFrequency> &ExitWeights, BranchProbabilityInfo *BPI); - void emitCallAndSwitchStatement(Function *newFunction, - BasicBlock *newHeader, - ValueSet &inputs, - ValueSet &outputs); + CallInst *emitCallAndSwitchStatement(Function *newFunction, + BasicBlock *newHeader, + ValueSet &inputs, ValueSet &outputs); }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 79904e206a0..25d4ae583ec 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -883,9 +883,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, /// emitCallAndSwitchStatement - This method sets up the caller side by adding /// the call instruction, splitting any PHI nodes in the header block as /// necessary. -void CodeExtractor:: -emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, - ValueSet &inputs, ValueSet &outputs) { +CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, + BasicBlock *codeReplacer, + ValueSet &inputs, + ValueSet &outputs) { // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs std::vector<Value *> params, StructValues, ReloadOutputs, Reloads; @@ -893,6 +894,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Module *M = newFunction->getParent(); LLVMContext &Context = M->getContext(); const DataLayout &DL = M->getDataLayout(); + CallInst *call = nullptr; // Add inputs as params, or to be filled into the struct for (Value *input : inputs) @@ -943,8 +945,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, } // Emit the call to the function - CallInst *call = CallInst::Create(newFunction, params, - NumExitBlocks > 1 ? "targetBlock" : ""); + call = CallInst::Create(newFunction, params, + NumExitBlocks > 1 ? "targetBlock" : ""); // Add debug location to the new call, if the original function has debug // info. In that case, the terminator of the entry block of the extracted // function contains the first debug location of the extracted function, @@ -1116,6 +1118,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); break; } + + return call; } void CodeExtractor::moveCodeToFunction(Function *newFunction) { @@ -1177,6 +1181,71 @@ void CodeExtractor::calculateNewCallTerminatorWeights( MDBuilder(TI->getContext()).createBranchWeights(BranchWeights)); } +/// Scan the extraction region for lifetime markers which reference inputs. +/// Erase these markers. Return the inputs which were referenced. +/// +/// The extraction region is defined by a set of blocks (\p Blocks), and a set +/// of allocas which will be moved from the caller function into the extracted +/// function (\p SunkAllocas). +static SetVector<Value *> +eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks, + const SetVector<Value *> &SunkAllocas) { + SetVector<Value *> InputObjectsWithLifetime; + for (BasicBlock *BB : Blocks) { + for (auto It = BB->begin(), End = BB->end(); It != End;) { + auto *II = dyn_cast<IntrinsicInst>(&*It); + ++It; + if (!II || !II->isLifetimeStartOrEnd()) + continue; + + // Get the memory operand of the lifetime marker. If the underlying + // object is a sunk alloca, or is otherwise defined in the extraction + // region, the lifetime marker must not be erased. + Value *Mem = II->getOperand(1)->stripInBoundsOffsets(); + if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem)) + continue; + + InputObjectsWithLifetime.insert(Mem); + II->eraseFromParent(); + } + } + return InputObjectsWithLifetime; +} + +/// Insert lifetime start/end markers surrounding the call to the new function +/// for objects defined in the caller. +static void insertLifetimeMarkersSurroundingCall( + Module *M, const SetVector<Value *> &InputObjectsWithLifetime, + CallInst *TheCall) { + if (InputObjectsWithLifetime.empty()) + return; + + LLVMContext &Ctx = M->getContext(); + auto Int8PtrTy = Type::getInt8PtrTy(Ctx); + auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1); + auto LifetimeStartFn = llvm::Intrinsic::getDeclaration( + M, llvm::Intrinsic::lifetime_start, Int8PtrTy); + auto LifetimeEndFn = llvm::Intrinsic::getDeclaration( + M, llvm::Intrinsic::lifetime_end, Int8PtrTy); + for (Value *Mem : InputObjectsWithLifetime) { + assert((!isa<Instruction>(Mem) || + cast<Instruction>(Mem)->getFunction() == TheCall->getFunction()) && + "Input memory not defined in original function"); + Value *MemAsI8Ptr = nullptr; + if (Mem->getType() == Int8PtrTy) + MemAsI8Ptr = Mem; + else + MemAsI8Ptr = + CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall); + + auto StartMarker = + CallInst::Create(LifetimeStartFn, {NegativeOne, MemAsI8Ptr}); + StartMarker->insertBefore(TheCall); + auto EndMarker = CallInst::Create(LifetimeEndFn, {NegativeOne, MemAsI8Ptr}); + EndMarker->insertAfter(TheCall); + } +} + Function *CodeExtractor::extractCodeRegion() { if (!isEligible()) return nullptr; @@ -1291,11 +1360,17 @@ Function *CodeExtractor::extractCodeRegion() { cast<Instruction>(II)->moveBefore(TI); } + // Collect objects which are inputs to the extraction region and also + // referenced by lifetime start/end markers within it. The effects of these + // markers must be replicated in the calling function to prevent the stack + // coloring pass from merging slots which store input objects. + ValueSet InputObjectsWithLifetime = + eraseLifetimeMarkersOnInputs(Blocks, SinkingCands); + // Construct new function based on inputs/outputs & add allocas for all defs. - Function *newFunction = constructFunction(inputs, outputs, header, - newFuncRoot, - codeReplacer, oldFunction, - oldFunction->getParent()); + Function *newFunction = + constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer, + oldFunction, oldFunction->getParent()); // Update the entry count of the function. if (BFI) { @@ -1306,10 +1381,16 @@ Function *CodeExtractor::extractCodeRegion() { BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); } - emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs); + CallInst *TheCall = + emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs); moveCodeToFunction(newFunction); + // Replicate the effects of any lifetime start/end markers which referenced + // input objects in the extraction region by placing markers around the call. + insertLifetimeMarkersSurroundingCall(oldFunction->getParent(), + InputObjectsWithLifetime, TheCall); + // Propagate personality info to the new function if there is one. if (oldFunction->hasPersonalityFn()) newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); diff --git a/llvm/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll b/llvm/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll index 6bb38d44f46..04789eaad29 100644 --- a/llvm/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll +++ b/llvm/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll @@ -6,10 +6,14 @@ @g = external local_unnamed_addr global i32, align 4 +; CHECK-LABEL: define{{.*}}@caller( +; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %tmp.i) +; CHECK-NEXT: call void @callee_unknown_use1.{{.*}}(i8* %tmp.i +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %tmp.i) + define i32 @callee_unknown_use1(i32 %arg) local_unnamed_addr #0 { ; CHECK-LABEL:define{{.*}}@callee_unknown_use1.{{[0-9]}} ; CHECK-NOT: alloca -; CHECK: call void @llvm.lifetime bb: %tmp = alloca i8, align 4 %tmp2 = load i32, i32* @g, align 4, !tbaa !2 diff --git a/llvm/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll b/llvm/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll index 9c53496e1ce..0bde58fbccd 100644 --- a/llvm/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll +++ b/llvm/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll @@ -9,7 +9,6 @@ define i32 @callee_unknown_use2(i32 %arg) local_unnamed_addr #0 { ; CHECK-LABEL:define{{.*}}@callee_unknown_use2.{{[0-9]}} ; CHECK-NOT: alloca -; CHECK: call void @llvm.lifetime bb: %tmp = alloca i32, align 4 %tmp1 = bitcast i32* %tmp to i8* diff --git a/llvm/test/Transforms/HotColdSplit/lifetime-markers-on-inputs.ll b/llvm/test/Transforms/HotColdSplit/lifetime-markers-on-inputs.ll new file mode 100644 index 00000000000..c6482f823b4 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/lifetime-markers-on-inputs.ll @@ -0,0 +1,66 @@ +; RUN: opt -S -hotcoldsplit < %s 2>&1 | FileCheck %s + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) + +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +declare void @use(i8*) + +declare void @cold_use2(i8*, i8*) cold + +; CHECK-LABEL: define {{.*}}@foo( +define void @foo() { +entry: + %local1 = alloca i256 + %local2 = alloca i256 + %local1_cast = bitcast i256* %local1 to i8* + %local2_cast = bitcast i256* %local2 to i8* + br i1 undef, label %normalPath, label %outlinedPath + +normalPath: + ; These two uses of stack slots are non-overlapping. Based on this alone, + ; the stack slots could be merged. + call void @llvm.lifetime.start.p0i8(i64 1, i8* %local1_cast) + call void @use(i8* %local1_cast) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local1_cast) + call void @llvm.lifetime.start.p0i8(i64 1, i8* %local2_cast) + call void @use(i8* %local2_cast) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local2_cast) + ret void + +; CHECK-LABEL: codeRepl: +; CHECK: [[local1_cast:%.*]] = bitcast i256* %local1 to i8* +; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local1_cast]]) +; CHECK: [[local2_cast:%.*]] = bitcast i256* %local2 to i8* +; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local2_cast]]) +; CHECK: call i1 @foo.cold.1(i8* %local1_cast, i8* %local2_cast) +; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local2_cast]]) +; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local1_cast]]) +; CHECK: br i1 + +outlinedPath: + ; These two uses of stack slots are overlapping. This should prevent + ; merging of stack slots. CodeExtractor must replicate the effects of + ; these markers in the caller to inhibit stack coloring. + %gep1 = getelementptr inbounds i8, i8* %local1_cast, i64 1 + call void @llvm.lifetime.start.p0i8(i64 1, i8* %gep1) + call void @llvm.lifetime.start.p0i8(i64 1, i8* %local2_cast) + call void @cold_use2(i8* %local1_cast, i8* %local2_cast) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %gep1) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local2_cast) + br i1 undef, label %outlinedPath2, label %outlinedPathExit + +outlinedPath2: + ; These extra lifetime markers are used to test that we emit only one + ; pair of guard markers in the caller per memory object. + call void @llvm.lifetime.start.p0i8(i64 1, i8* %local2_cast) + call void @use(i8* %local2_cast) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local2_cast) + ret void + +outlinedPathExit: + ret void +} + +; CHECK-LABEL: define {{.*}}@foo.cold.1( +; CHECK-NOT: @llvm.lifetime |