summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2017-04-24 17:08:43 +0000
committerNicolai Haehnle <nhaehnle@gmail.com>2017-04-24 17:08:43 +0000
commit9c661853150e304b9c6604df44a0181795ae633f (patch)
treee6e026dc794a12760af5d1a083b254930e5cea00 /llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
parent87aafa073fcc56a266a97a4de9b86e1e1b2f165a (diff)
downloadbcm5719-llvm-9c661853150e304b9c6604df44a0181795ae633f.tar.gz
bcm5719-llvm-9c661853150e304b9c6604df44a0181795ae633f.zip
InstCombine/AMDGPU: Fix constant folding of llvm.amdgcn.{icmp,fcmp}
Summary: The return value of these intrinsics should always have 0 bits for inactive threads. This means that when all arguments are constant and the comparison evaluates to true, the intrinsic should return the current exec mask. Fixes some GL_ARB_shader_ballot tests. Reviewers: arsenm Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D32344 llvm-svn: 301195
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp22
1 files changed, 20 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e7aa1a45737..0ea1c7be422 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3432,8 +3432,26 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
- return replaceInstUsesWith(*II,
- ConstantExpr::getSExt(CCmp, II->getType()));
+ if (CCmp->isNullValue()) {
+ return replaceInstUsesWith(
+ *II, ConstantExpr::getSExt(CCmp, II->getType()));
+ }
+
+ // The result of V_ICMP/V_FCMP assembly instructions (which this
+ // intrinsic exposes) is one bit per thread, masked with the EXEC
+ // register (which contains the bitmask of live threads). So a
+ // comparison that always returns true is the same as a read of the
+ // EXEC register.
+ Value *NewF = Intrinsic::getDeclaration(
+ II->getModule(), Intrinsic::read_register, II->getType());
+ Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
+ MDNode *MD = MDNode::get(II->getContext(), MDArgs);
+ Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
+ CallInst *NewCall = Builder->CreateCall(NewF, Args);
+ NewCall->addAttribute(AttributeList::FunctionIndex,
+ Attribute::Convergent);
+ NewCall->takeName(II);
+ return replaceInstUsesWith(*II, NewCall);
}
// Canonicalize constants to RHS.
OpenPOWER on IntegriCloud