summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2015-01-06 17:41:18 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2015-01-06 17:41:18 +0000
commitf807a6f2970398b85ccd2a3ba6d8959426a9872f (patch)
tree911eb76a03413e1686ee193038b3d84907aef6a6 /llvm
parenta76dfbd4288271d3a61fe2f4307c406780ce7c49 (diff)
downloadbcm5719-llvm-f807a6f2970398b85ccd2a3ba6d8959426a9872f.tar.gz
bcm5719-llvm-f807a6f2970398b85ccd2a3ba6d8959426a9872f.zip
[CodeGenPrepare] Improved logic to speculate calls to cttz/ctlz.
This patch improves the logic added at revision 224899 (see review D6728) that teaches the backend when it is profitable to speculate calls to cttz/ctlz. The original algorithm conservatively avoided speculating more than one instruction from a basic block in a control flow grap modelling an if-statement. In particular, the only allowed instruction (excluding the terminator) was a call to cttz/ctlz. However, there are cases where we could be less conservative and still be able to speculate a call to cttz/ctlz. With this patch, CodeGenPrepare now tries to speculate a cttz/ctlz if the result is zero extended/truncated in the same basic block, and the zext/trunc instruction is "free" for the target. Added new test cases to CodeGen/X86/cttz-ctlz.ll Differential Revision: http://reviews.llvm.org/D6853 llvm-svn: 225274
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp41
-rw-r--r--llvm/test/CodeGen/X86/cttz-ctlz.ll172
2 files changed, 207 insertions, 6 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 51ea7db30ba..be2cb51785f 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -4008,15 +4008,41 @@ static bool OptimizeBranchInst(BranchInst *BrInst, const TargetLowering &TLI) {
// See if ThenBB contains only one instruction (excluding the
// terminator and DbgInfoIntrinsic calls).
IntrinsicInst *II = nullptr;
+ CastInst *CI = nullptr;
for (BasicBlock::iterator I = ThenBB->begin(),
E = std::prev(ThenBB->end()); I != E; ++I) {
// Skip debug info.
if (isa<DbgInfoIntrinsic>(I))
continue;
- if (II)
- // Avoid speculating more than one instruction.
- return false;
+ // Check if this is a zero extension or a truncate of a previously
+ // matched call to intrinsic cttz/ctlz.
+ if (II) {
+ // Early exit if we already found a "free" zero extend/truncate.
+ if (CI)
+ return false;
+
+ Type *SrcTy = II->getType();
+ Type *DestTy = I->getType();
+ Value *V;
+
+ if (match(cast<Instruction>(I), m_ZExt(m_Value(V))) && V == II) {
+ // Speculate this zero extend only if it is "free" for the target.
+ if (TLI.isZExtFree(SrcTy, DestTy)) {
+ CI = cast<CastInst>(I);
+ continue;
+ }
+ } else if (match(cast<Instruction>(I), m_Trunc(m_Value(V))) && V == II) {
+ // Speculate this truncate only if it is "free" for the target.
+ if (TLI.isTruncateFree(SrcTy, DestTy)) {
+ CI = cast<CastInst>(I);
+ continue;
+ }
+ } else {
+ // Avoid speculating more than one instruction.
+ return false;
+ }
+ }
// See if this is a call to intrinsic cttz/ctlz.
if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::cttz>())) {
@@ -4041,11 +4067,14 @@ static bool OptimizeBranchInst(BranchInst *BrInst, const TargetLowering &TLI) {
Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
Value *OrigV = PN->getIncomingValueForBlock(EntryBB);
- if (!OrigV || ThenV != II)
+ if (!OrigV)
return false;
+ if (ThenV != II && (!CI || ThenV != CI))
+ return false;
+
if (ConstantInt *CInt = dyn_cast<ConstantInt>(OrigV)) {
- unsigned BitWidth = ThenV->getType()->getIntegerBitWidth();
+ unsigned BitWidth = II->getType()->getIntegerBitWidth();
// Don't try to simplify this phi node if 'ThenV' is a cttz/ctlz
// intrinsic call, but 'OrigV' is not equal to the 'size-of' in bits
@@ -4070,7 +4099,7 @@ static bool OptimizeBranchInst(BranchInst *BrInst, const TargetLowering &TLI) {
ConstantInt::getFalse(II->getContext()) };
Module *M = EntryBB->getParent()->getParent();
Value *IF = Intrinsic::getDeclaration(M, II->getIntrinsicID(), Ty);
- IRBuilder<> Builder(BrInst);
+ IRBuilder<> Builder(II);
Instruction *NewI = Builder.CreateCall(IF, Args);
// Replace the old call to cttz/ctlz.
diff --git a/llvm/test/CodeGen/X86/cttz-ctlz.ll b/llvm/test/CodeGen/X86/cttz-ctlz.ll
index 88dcf650a21..8717d401595 100644
--- a/llvm/test/CodeGen/X86/cttz-ctlz.ll
+++ b/llvm/test/CodeGen/X86/cttz-ctlz.ll
@@ -241,6 +241,178 @@ cond.end: ; preds = %entry, %cond.true
ret i16 %cond
}
+; The following tests verify that calls to cttz/ctlz are speculated even if
+; basic block %cond.true has an extra zero extend/truncate which is "free"
+; for the target.
+
+define i64 @test1e(i32 %x) {
+; ALL-LABEL: @test1e(
+; LZCNT: icmp eq i32 %x, 0
+; LZCNT: call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; BMI: call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; GENERIC: icmp eq i32 %x, 0
+; GENERIC: call i32 @llvm.cttz.i32(i32 %x, i1 true)
+entry:
+ %tobool = icmp eq i32 %x, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+ %phitmp2 = zext i32 %0 to i64
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i64 [ %phitmp2, %cond.true ], [ 32, %entry ]
+ ret i64 %cond
+}
+
+define i32 @test2e(i64 %x) {
+; ALL-LABEL: @test2e(
+; LZCNT: icmp eq i64 %x, 0
+; LZCNT: call i64 @llvm.cttz.i64(i64 %x, i1 true)
+; BMI: call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; GENERIC: icmp eq i64 %x, 0
+; GENERIC: call i64 @llvm.cttz.i64(i64 %x, i1 true)
+entry:
+ %tobool = icmp eq i64 %x, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+ %cast = trunc i64 %0 to i32
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i32 [ %cast, %cond.true ], [ 64, %entry ]
+ ret i32 %cond
+}
+
+define i64 @test3e(i32 %x) {
+; ALL-LABEL: @test3e(
+; BMI: icmp eq i32 %x, 0
+; BMI: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+; LZCNT: call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+; GENERIC: icmp eq i32 %x, 0
+; GENERIC: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+entry:
+ %tobool = icmp eq i32 %x, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %phitmp2 = zext i32 %0 to i64
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i64 [ %phitmp2, %cond.true ], [ 32, %entry ]
+ ret i64 %cond
+}
+
+define i32 @test4e(i64 %x) {
+; ALL-LABEL: @test4e(
+; BMI: icmp eq i64 %x, 0
+; BMI: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+; LZCNT: call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+; GENERIC: icmp eq i64 %x, 0
+; GENERIC: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+entry:
+ %tobool = icmp eq i64 %x, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+ %cast = trunc i64 %0 to i32
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i32 [ %cast, %cond.true ], [ 64, %entry ]
+ ret i32 %cond
+}
+
+define i16 @test5e(i64 %x) {
+; ALL-LABEL: @test5e(
+; BMI: icmp eq i64 %x, 0
+; BMI: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+; LZCNT: call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+; GENERIC: icmp eq i64 %x, 0
+; GENERIC: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+entry:
+ %tobool = icmp eq i64 %x, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+ %cast = trunc i64 %0 to i16
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i16 [ %cast, %cond.true ], [ 64, %entry ]
+ ret i16 %cond
+}
+
+define i16 @test6e(i32 %x) {
+; ALL-LABEL: @test6e(
+; BMI: icmp eq i32 %x, 0
+; BMI: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+; LZCNT: call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+; GENERIC: icmp eq i32 %x, 0
+; GENERIC: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+entry:
+ %tobool = icmp eq i32 %x, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %cast = trunc i32 %0 to i16
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i16 [ %cast, %cond.true ], [ 32, %entry ]
+ ret i16 %cond
+}
+
+define i16 @test7e(i64 %x) {
+; ALL-LABEL: @test7e(
+; LZCNT: icmp eq i64 %x, 0
+; LZCNT: call i64 @llvm.cttz.i64(i64 %x, i1 true)
+; BMI: call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; GENERIC: icmp eq i64 %x, 0
+; GENERIC: call i64 @llvm.cttz.i64(i64 %x, i1 true)
+entry:
+ %tobool = icmp eq i64 %x, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+ %cast = trunc i64 %0 to i16
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i16 [ %cast, %cond.true ], [ 64, %entry ]
+ ret i16 %cond
+}
+
+define i16 @test8e(i32 %x) {
+; ALL-LABEL: @test8e(
+; LZCNT: icmp eq i32 %x, 0
+; LZCNT: call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; BMI: call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; GENERIC: icmp eq i32 %x, 0
+; GENERIC: call i32 @llvm.cttz.i32(i32 %x, i1 true)
+entry:
+ %tobool = icmp eq i32 %x, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+ %cast = trunc i32 %0 to i16
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i16 [ %cast, %cond.true ], [ 32, %entry ]
+ ret i16 %cond
+}
+
declare i64 @llvm.ctlz.i64(i64, i1)
declare i32 @llvm.ctlz.i32(i32, i1)
OpenPOWER on IntegriCloud