diff options
-rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfoImpl.h | 25 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/costmodel.ll | 6 |
2 files changed, 24 insertions, 7 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 63b47ffa3a5..0f66dc46dc3 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -785,16 +785,27 @@ public: if (getUserCost(I, Operands) == TTI::TCC_Free) return 0; - if (isa<CallInst>(I)) - return 40; - if (isa<LoadInst>(I)) return 4; - Type *dstTy = I->getType(); - if (VectorType *VectorTy = dyn_cast<VectorType>(dstTy)) - dstTy = VectorTy->getElementType(); - if (dstTy->isFloatingPointTy()) + Type *DstTy = I->getType(); + + // Usually an intrinsic is a simple instruction. + // A real function call is much slower. + if (auto *CI = dyn_cast<CallInst>(I)) { + const Function *F = CI->getCalledFunction(); + if (static_cast<T *>(this)->isLoweredToCall(F)) + return 40; + // Some intrinsics return a value and a flag, we use the value type + // to decide its latency. + if (StructType* StructTy = dyn_cast<StructType>(DstTy)) + DstTy = StructTy->getElementType(0); + // Fall through to simple instructions. + } + + if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy)) + DstTy = VectorTy->getElementType(); + if (DstTy->isFloatingPointTy()) return 3; return 1; diff --git a/llvm/test/Analysis/CostModel/X86/costmodel.ll b/llvm/test/Analysis/CostModel/X86/costmodel.ll index d16cc27cb16..19e7128ff44 100644 --- a/llvm/test/Analysis/CostModel/X86/costmodel.ll +++ b/llvm/test/Analysis/CostModel/X86/costmodel.ll @@ -5,6 +5,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) + define i64 @foo(i64 %arg) { ; LATENCY: cost of 0 {{.*}} alloca i32 @@ -39,6 +41,10 @@ define i64 @foo(i64 %arg) { ; CODESIZE: cost of 0 {{.*}} trunc %TC = trunc i64 undef to i32 + ; LATENCY: cost of 1 {{.*}} call + ; CODESIZE: cost of 1 {{.*}} call + %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) + ; LATENCY: cost of 1 {{.*}} ret ; CODESIZE: cost of 1 {{.*}} ret ret i64 undef |