diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp | 25 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll | 67 |
2 files changed, 92 insertions, 0 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index a2b28943708..19e4448d199 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -939,6 +939,15 @@ isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue) { return false; } +static bool isBswapIntrinsicCall(const Value *V) { + if (const Instruction *I = dyn_cast<Instruction>(V)) + if (auto *CI = dyn_cast<CallInst>(I)) + if (auto *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::bswap) + return true; + return false; +} + int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I) { @@ -975,6 +984,22 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned NumOps = (Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src)); + // Store/Load reversed saves one instruction. + if (!Src->isVectorTy() && NumOps == 1 && I != nullptr) { + if (Opcode == Instruction::Load && I->hasOneUse()) { + const Instruction *LdUser = cast<Instruction>(*I->user_begin()); + // In case of load -> bswap -> store, return normal cost for the load. + if (isBswapIntrinsicCall(LdUser) && + (!LdUser->hasOneUse() || !isa<StoreInst>(*LdUser->user_begin()))) + return 0; + } + else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { + const Value *StoredVal = SI->getValueOperand(); + if (StoredVal->hasOneUse() && isBswapIntrinsicCall(StoredVal)) + return 0; + } + } + if (Src->getScalarSizeInBits() == 128) // 128 bit scalars are held in a pair of two 64 bit registers. NumOps *= 2; diff --git a/llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll b/llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll index 52d5b654cfd..f83cf5a7c3d 100644 --- a/llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll @@ -40,6 +40,73 @@ define void @bswap_i16(i16 %arg, <2 x i16> %arg2, <4 x i16> %arg4, ret void } +; Test that store/load reversed is reflected in costs. +define void @bswap_i64_mem(i64* %src, i64 %arg, i64* %dst) { +; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i64_mem': +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i64, i64* %src +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i64 @llvm.bswap.i64(i64 %Ld1) +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call i64 @llvm.bswap.i64(i64 %arg) +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i64 %swp2, i64* %dst +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i64, i64* %src +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call i64 @llvm.bswap.i64(i64 %Ld2) +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i64 %swp3, i64* %dst + %Ld1 = load i64, i64* %src + %swp1 = tail call i64 @llvm.bswap.i64(i64 %Ld1) + + %swp2 = tail call i64 @llvm.bswap.i64(i64 %arg) + store i64 %swp2, i64* %dst + + %Ld2 = load i64, i64* %src + %swp3 = tail call i64 @llvm.bswap.i64(i64 %Ld2) + store i64 %swp3, i64* %dst + + ret void +} + +define void @bswap_i32_mem(i32* %src, i32 %arg, i32* %dst) { +; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i32_mem': +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i32, i32* %src +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i32 @llvm.bswap.i32(i32 %Ld1) +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call i32 @llvm.bswap.i32(i32 %arg) +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i32 %swp2, i32* %dst +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i32, i32* %src +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call i32 @llvm.bswap.i32(i32 %Ld2) +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i32 %swp3, i32* %dst + %Ld1 = load i32, i32* %src + %swp1 = tail call i32 @llvm.bswap.i32(i32 %Ld1) + + %swp2 = tail call i32 @llvm.bswap.i32(i32 %arg) + store i32 %swp2, i32* %dst + + %Ld2 = load i32, i32* %src + %swp3 = tail call i32 @llvm.bswap.i32(i32 %Ld2) + store i32 %swp3, i32* %dst + + ret void +} + +define void @bswap_i16_mem(i16* %src, i16 %arg, i16* %dst) { +; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i16_mem': +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i16, i16* %src +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i16 @llvm.bswap.i16(i16 %Ld1) +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call i16 @llvm.bswap.i16(i16 %arg) +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i16 %swp2, i16* %dst +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i16, i16* %src +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call i16 @llvm.bswap.i16(i16 %Ld2) +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i16 %swp3, i16* %dst + %Ld1 = load i16, i16* %src + %swp1 = tail call i16 @llvm.bswap.i16(i16 %Ld1) + + %swp2 = tail call i16 @llvm.bswap.i16(i16 %arg) + store i16 %swp2, i16* %dst + + %Ld2 = load i16, i16* %src + %swp3 = tail call i16 @llvm.bswap.i16(i16 %Ld2) + store i16 %swp3, i16* %dst + + ret void +} + declare i64 @llvm.bswap.i64(i64) declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) |