diff options
author | Nicolai Haehnle <nhaehnle@gmail.com> | 2017-04-24 17:08:43 +0000 |
---|---|---|
committer | Nicolai Haehnle <nhaehnle@gmail.com> | 2017-04-24 17:08:43 +0000 |
commit | 9c661853150e304b9c6604df44a0181795ae633f (patch) | |
tree | e6e026dc794a12760af5d1a083b254930e5cea00 /llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | |
parent | 87aafa073fcc56a266a97a4de9b86e1e1b2f165a (diff) | |
download | bcm5719-llvm-9c661853150e304b9c6604df44a0181795ae633f.tar.gz bcm5719-llvm-9c661853150e304b9c6604df44a0181795ae633f.zip |
InstCombine/AMDGPU: Fix constant folding of llvm.amdgcn.{icmp,fcmp}
Summary:
The return value of these intrinsics should always have 0 bits for
inactive threads. This means that when all arguments are constant
and the comparison evaluates to true, the intrinsic should return
the current exec mask.
Fixes some GL_ARB_shader_ballot tests.
Reviewers: arsenm
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D32344
llvm-svn: 301195
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 22 |
1 files changed, 20 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index e7aa1a45737..0ea1c7be422 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3432,8 +3432,26 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (auto *CSrc0 = dyn_cast<Constant>(Src0)) { if (auto *CSrc1 = dyn_cast<Constant>(Src1)) { Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1); - return replaceInstUsesWith(*II, - ConstantExpr::getSExt(CCmp, II->getType())); + if (CCmp->isNullValue()) { + return replaceInstUsesWith( + *II, ConstantExpr::getSExt(CCmp, II->getType())); + } + + // The result of V_ICMP/V_FCMP assembly instructions (which this + // intrinsic exposes) is one bit per thread, masked with the EXEC + // register (which contains the bitmask of live threads). So a + // comparison that always returns true is the same as a read of the + // EXEC register. + Value *NewF = Intrinsic::getDeclaration( + II->getModule(), Intrinsic::read_register, II->getType()); + Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")}; + MDNode *MD = MDNode::get(II->getContext(), MDArgs); + Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)}; + CallInst *NewCall = Builder->CreateCall(NewF, Args); + NewCall->addAttribute(AttributeList::FunctionIndex, + Attribute::Convergent); + NewCall->takeName(II); + return replaceInstUsesWith(*II, NewCall); } // Canonicalize constants to RHS. |