summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfoImpl.h3
-rw-r--r--llvm/include/llvm/CodeGen/BasicTTIImpl.h19
-rw-r--r--llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll189
3 files changed, 209 insertions, 2 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index b90f6888238..364d458b5d0 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -421,8 +421,7 @@ public:
return TTI::TCC_Free;
}
- // Otherwise delegate to the fully generic implementations.
- return getOperationCost(
+ return static_cast<T *>(this)->getOperationCost(
Operator::getOpcode(U), U->getType(),
U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
}
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index f3f6ba21eea..25a74b331de 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -195,6 +195,25 @@ public:
return TargetTransformInfo::TCC_Basic;
}
+ unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
+ const TargetLoweringBase *TLI = getTLI();
+ switch (Opcode) {
+ default: break;
+ case Instruction::Trunc: {
+ if (TLI->isTruncateFree(OpTy, Ty))
+ return TargetTransformInfo::TCC_Free;
+ return TargetTransformInfo::TCC_Basic;
+ }
+ case Instruction::ZExt: {
+ if (TLI->isZExtFree(OpTy, Ty))
+ return TargetTransformInfo::TCC_Free;
+ return TargetTransformInfo::TCC_Basic;
+ }
+ }
+
+ return BaseT::getOperationCost(Opcode, Ty, OpTy);
+ }
+
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) {
// This unrolling functionality is target independent, but to provide some
// motivation for its intended use, for x86:
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll b/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
index 6873d7da788..69f6c69059d 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
@@ -133,6 +133,195 @@ cond.end: ; preds = %entry, %cond.true
ret i16 %cond
}
+; The following tests verify that calls to cttz/ctlz are speculated even if
+; basic block %cond.true has an extra zero extend/truncate which is "free"
+; for the target.
+
+define i64 @test1e(i32 %x) {
+; ALL-LABEL: @test1e(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %x, 0
+; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; ALL: [[ZEXT:%[A-Za-z0-9]+]] = zext i32 [[CTTZ]] to i64
+; BMI-NEXT: select i1 [[COND]], i64 32, i64 [[ZEXT]]
+; LZCNT-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
+entry:
+ %tobool = icmp eq i32 %x, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+ %phitmp2 = zext i32 %0 to i64
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i64 [ %phitmp2, %cond.true ], [ 32, %entry ]
+ ret i64 %cond
+}
+
+define i32 @test2e(i64 %x) {
+; ALL-LABEL: @test2e(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %x, 0
+; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i64 [[CTTZ]] to i32
+; BMI-NEXT: select i1 [[COND]], i32 64, i32 [[TRUNC]]
+; LZCNT-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
+entry:
+ %tobool = icmp eq i64 %x, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+ %cast = trunc i64 %0 to i32
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i32 [ %cast, %cond.true ], [ 64, %entry ]
+ ret i32 %cond
+}
+
+define i64 @test3e(i32 %x) {
+; ALL-LABEL: @test3e(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %x, 0
+; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+; ALL: [[ZEXT:%[A-Za-z0-9]+]] = zext i32 [[CTLZ]] to i64
+; LZCNT-NEXT: select i1 [[COND]], i64 32, i64 [[ZEXT]]
+; BMI-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
+entry:
+ %tobool = icmp eq i32 %x, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %phitmp2 = zext i32 %0 to i64
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i64 [ %phitmp2, %cond.true ], [ 32, %entry ]
+ ret i64 %cond
+}
+
+define i32 @test4e(i64 %x) {
+; ALL-LABEL: @test4e(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %x, 0
+; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i64 [[CTLZ]] to i32
+; LZCNT-NEXT: select i1 [[COND]], i32 64, i32 [[TRUNC]]
+; BMI-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
+entry:
+ %tobool = icmp eq i64 %x, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+ %cast = trunc i64 %0 to i32
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i32 [ %cast, %cond.true ], [ 64, %entry ]
+ ret i32 %cond
+}
+
+define i16 @test5e(i64 %x) {
+; ALL-LABEL: @test5e(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %x, 0
+; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i64 [[CTLZ]] to i16
+; LZCNT-NEXT: select i1 [[COND]], i16 64, i16 [[TRUNC]]
+; BMI-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
+entry:
+ %tobool = icmp eq i64 %x, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+ %cast = trunc i64 %0 to i16
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i16 [ %cast, %cond.true ], [ 64, %entry ]
+ ret i16 %cond
+}
+
+define i16 @test6e(i32 %x) {
+; ALL-LABEL: @test6e(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %x, 0
+; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i32 [[CTLZ]] to i16
+; LZCNT-NEXT: select i1 [[COND]], i16 32, i16 [[TRUNC]]
+; BMI-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
+entry:
+ %tobool = icmp eq i32 %x, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %cast = trunc i32 %0 to i16
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i16 [ %cast, %cond.true ], [ 32, %entry ]
+ ret i16 %cond
+}
+
+define i16 @test7e(i64 %x) {
+; ALL-LABEL: @test7e(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %x, 0
+; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i64 [[CTTZ]] to i16
+; BMI-NEXT: select i1 [[COND]], i16 64, i16 [[TRUNC]]
+; LZCNT-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
+entry:
+ %tobool = icmp eq i64 %x, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+ %cast = trunc i64 %0 to i16
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i16 [ %cast, %cond.true ], [ 64, %entry ]
+ ret i16 %cond
+}
+
+define i16 @test8e(i32 %x) {
+; ALL-LABEL: @test8e(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %x, 0
+; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i32 [[CTTZ]] to i16
+; BMI-NEXT: select i1 [[COND]], i16 32, i16 [[TRUNC]]
+; LZCNT-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
+entry:
+ %tobool = icmp eq i32 %x, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+ %cast = trunc i32 %0 to i16
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i16 [ %cast, %cond.true ], [ 32, %entry ]
+ ret i16 %cond
+}
+
+
declare i64 @llvm.ctlz.i64(i64, i1)
declare i32 @llvm.ctlz.i32(i32, i1)
declare i16 @llvm.ctlz.i16(i16, i1)
OpenPOWER on IntegriCloud