diff options
7 files changed, 58 insertions, 104 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 4aafc2e2cad..cfdfa00e805 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -291,76 +291,58 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { return visitAllocSite(AI); } +/// \brief Combine loads to match the type of value their uses after looking +/// through intervening bitcasts. +/// +/// The core idea here is that if the result of a load is used in an operation, +/// we should load the type most conducive to that operation. For example, when +/// loading an integer and converting that immediately to a pointer, we should +/// instead directly load a pointer. +/// +/// However, this routine must never change the width of a load or the number of +/// loads as that would introduce a semantic change. This combine is expected to +/// be a semantic no-op which just allows loads to more closely model the types +/// of their consuming operations. +/// +/// Currently, we also refuse to change the precise type used for an atomic load +/// or a volatile load. This is debatable, and might be reasonable to change +/// later. However, it is risky in case some backend or other part of LLVM is +/// relying on the exact type loaded to select appropriate atomic operations. +static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { + // FIXME: We could probably with some care handle both volatile and atomic + // loads here but it isn't clear that this is important. + if (!LI.isSimple()) + return nullptr; -/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. -static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, - const DataLayout *DL) { - User *CI = cast<User>(LI.getOperand(0)); - Value *CastOp = CI->getOperand(0); - - PointerType *DestTy = cast<PointerType>(CI->getType()); - Type *DestPTy = DestTy->getElementType(); - if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { - - // If the address spaces don't match, don't eliminate the cast. - if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) - return nullptr; - - Type *SrcPTy = SrcTy->getElementType(); - - if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || - DestPTy->isVectorTy()) { - // If the source is an array, the code below will not succeed. Check to - // see if a trivial 'gep P, 0, 0' will help matters. Only do this for - // constants. - if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) - if (Constant *CSrc = dyn_cast<Constant>(CastOp)) - if (ASrcTy->getNumElements() != 0) { - Type *IdxTy = DL - ? DL->getIntPtrType(SrcTy) - : Type::getInt64Ty(SrcTy->getContext()); - Value *Idx = Constant::getNullValue(IdxTy); - Value *Idxs[2] = { Idx, Idx }; - CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); - SrcTy = cast<PointerType>(CastOp->getType()); - SrcPTy = SrcTy->getElementType(); - } - - if (IC.getDataLayout() && - (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || - SrcPTy->isVectorTy()) && - // Do not allow turning this into a load of an integer, which is then - // casted to a pointer, this pessimizes pointer analysis a lot. - (SrcPTy->isPtrOrPtrVectorTy() == - LI.getType()->isPtrOrPtrVectorTy()) && - IC.getDataLayout()->getTypeSizeInBits(SrcPTy) == - IC.getDataLayout()->getTypeSizeInBits(DestPTy)) { - - // Okay, we are casting from one integer or pointer type to another of - // the same size. Instead of casting the pointer before the load, cast - // the result of the loaded value. - LoadInst *NewLoad = - IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); - NewLoad->setAlignment(LI.getAlignment()); - NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); - // Now cast the result of the load. - PointerType *OldTy = dyn_cast<PointerType>(NewLoad->getType()); - PointerType *NewTy = dyn_cast<PointerType>(LI.getType()); - if (OldTy && NewTy && - OldTy->getAddressSpace() != NewTy->getAddressSpace()) { - return new AddrSpaceCastInst(NewLoad, LI.getType()); - } + if (LI.use_empty()) + return nullptr; - return new BitCastInst(NewLoad, LI.getType()); - } + Value *Ptr = LI.getPointerOperand(); + unsigned AS = LI.getPointerAddressSpace(); + + // Fold away bit casts of the loaded value by loading the desired type. + if (LI.hasOneUse()) + if (auto *BC = dyn_cast<BitCastInst>(LI.user_back())) { + LoadInst *NewLoad = IC.Builder->CreateAlignedLoad( + IC.Builder->CreateBitCast(Ptr, BC->getDestTy()->getPointerTo(AS)), + LI.getAlignment(), LI.getName()); + BC->replaceAllUsesWith(NewLoad); + IC.EraseInstFromFunction(*BC); + return &LI; } - } + + // FIXME: We should also canonicalize loads of vectors when their elements are + // cast to other types. return nullptr; } Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); + // Try to canonicalize the loaded type. + if (Instruction *Res = combineLoadToOperationType(*this, LI)) + return Res; + // Attempt to improve the alignment. if (DL) { unsigned KnownAlign = @@ -376,11 +358,6 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { LI.setAlignment(EffectiveLoadAlign); } - // load (cast X) --> cast (load X) iff safe. - if (isa<CastInst>(Op)) - if (Instruction *Res = InstCombineLoadCast(*this, LI, DL)) - return Res; - // None of the following transforms are legal for volatile/atomic loads. // FIXME: Some of it is okay for atomic loads; needs refactoring. if (!LI.isSimple()) return nullptr; @@ -419,12 +396,6 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } - // Instcombine load (constantexpr_cast global) -> cast (load global) - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) - if (CE->isCast()) - if (Instruction *Res = InstCombineLoadCast(*this, LI, DL)) - return Res; - if (Op->hasOneUse()) { // Change select and PHI nodes to select values instead of addresses: this // helps alias analysis out a lot, allows many others simplifications, and diff --git a/llvm/test/Transforms/InstCombine/atomic.ll b/llvm/test/Transforms/InstCombine/atomic.ll index ccee87433f3..98cecefcc29 100644 --- a/llvm/test/Transforms/InstCombine/atomic.ll +++ b/llvm/test/Transforms/InstCombine/atomic.ll @@ -5,14 +5,6 @@ target triple = "x86_64-apple-macosx10.7.0" ; Check transforms involving atomic operations -define i32* @test1(i8** %p) { -; CHECK-LABEL: define i32* @test1( -; CHECK: load atomic i8** %p monotonic, align 8 - %c = bitcast i8** %p to i32** - %r = load atomic i32** %c monotonic, align 8 - ret i32* %r -} - define i32 @test2(i32* %p) { ; CHECK-LABEL: define i32 @test2( ; CHECK: %x = load atomic i32* %p seq_cst, align 4 diff --git a/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll b/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll index a6b56f94ffb..bc36b25b6de 100644 --- a/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll +++ b/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll @@ -90,7 +90,8 @@ entry: define void @bitcast_alias_scalar(float* noalias %source, float* noalias %dest) nounwind { entry: ; CHECK-LABEL: @bitcast_alias_scalar -; CHECK: bitcast float %tmp to i32 +; CHECK: bitcast float* %source to i32* +; CHECK: load i32* ; CHECK-NOT: fptoui ; CHECK-NOT: uitofp ; CHECK: bitcast i32 %call to float @@ -104,7 +105,8 @@ entry: define void @bitcast_alias_vector(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind { entry: ; CHECK-LABEL: @bitcast_alias_vector -; CHECK: bitcast <2 x float> %tmp to <2 x i32> +; CHECK: bitcast <2 x float>* %source to <2 x i32>* +; CHECK: load <2 x i32>* ; CHECK-NOT: fptoui ; CHECK-NOT: uitofp ; CHECK: bitcast <2 x i32> %call to <2 x float> @@ -118,7 +120,8 @@ entry: define void @bitcast_alias_vector_scalar_same_size(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind { entry: ; CHECK-LABEL: @bitcast_alias_vector_scalar_same_size -; CHECK: bitcast <2 x float> %tmp to i64 +; CHECK: bitcast <2 x float>* %source to i64* +; CHECK: load i64* ; CHECK: %call = call i64 @func_i64 ; CHECK: bitcast i64 %call to <2 x float> %tmp = load <2 x float>* %source, align 8 @@ -130,7 +133,8 @@ entry: define void @bitcast_alias_scalar_vector_same_size(i64* noalias %source, i64* noalias %dest) nounwind { entry: ; CHECK-LABEL: @bitcast_alias_scalar_vector_same_size -; CHECK: bitcast i64 %tmp to <2 x float> +; CHECK: bitcast i64* %source to <2 x float>* +; CHECK: load <2 x float>* ; CHECK: call <2 x float> @func_v2f32 ; CHECK: bitcast <2 x float> %call to i64 %tmp = load i64* %source, align 8 @@ -142,7 +146,8 @@ entry: define void @bitcast_alias_vector_ptrs_same_size(<2 x i64*>* noalias %source, <2 x i64*>* noalias %dest) nounwind { entry: ; CHECK-LABEL: @bitcast_alias_vector_ptrs_same_size -; CHECK: bitcast <2 x i64*> %tmp to <2 x i32*> +; CHECK: bitcast <2 x i64*>* %source to <2 x i32*>* +; CHECK: load <2 x i32*>* ; CHECK: call <2 x i32*> @func_v2i32p ; CHECK: bitcast <2 x i32*> %call to <2 x i64*> %tmp = load <2 x i64*>* %source, align 8 diff --git a/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll index 7fac78a40f5..bb61f02c8e2 100644 --- a/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll +++ b/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll @@ -161,12 +161,11 @@ define i32 @constant_fold_bitcast_itof_load() { ret i32 %a } -define <4 x i32> @constant_fold_bitcast_vector_as() { +define <4 x float> @constant_fold_bitcast_vector_as() { ; CHECK-LABEL: @constant_fold_bitcast_vector_as( ; CHECK: load <4 x float> addrspace(3)* @g_v4f_as3, align 16 -; CHECK: bitcast <4 x float> %1 to <4 x i32> - %a = load <4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*), align 4 - ret <4 x i32> %a + %a = load <4 x float> addrspace(3)* bitcast (<4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*) to <4 x float> addrspace(3)*), align 4 + ret <4 x float> %a } @i32_array_as3 = addrspace(3) global [10 x i32] zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/descale-zero.ll b/llvm/test/Transforms/InstCombine/descale-zero.ll index 7990fdb3eca..4656837cc05 100644 --- a/llvm/test/Transforms/InstCombine/descale-zero.ll +++ b/llvm/test/Transforms/InstCombine/descale-zero.ll @@ -5,8 +5,7 @@ target triple = "x86_64-apple-macosx10.10.0" define internal i8* @descale_zero() { entry: -; CHECK: load i16** inttoptr (i64 48 to i16**), align 16 -; CHECK-NEXT: bitcast i16* +; CHECK: load i8** inttoptr (i64 48 to i8**), align 16 ; CHECK-NEXT: ret i8* %i16_ptr = load i16** inttoptr (i64 48 to i16**), align 16 %num = load i64* inttoptr (i64 64 to i64*), align 64 diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll index b7daccc71af..bb4666219a7 100644 --- a/llvm/test/Transforms/InstCombine/getelementptr.ll +++ b/llvm/test/Transforms/InstCombine/getelementptr.ll @@ -703,7 +703,7 @@ define void @test39(%struct.ham* %arg, i8 %arg1) nounwind { ; CHECK-LABEL: @test39( ; CHECK: getelementptr inbounds %struct.ham* %arg, i64 0, i32 2 -; CHECK: getelementptr inbounds i8* %tmp3, i64 -8 +; CHECK: getelementptr inbounds i8* %{{.+}}, i64 -8 } define i1 @pr16483([1 x i8]* %a, [1 x i8]* %b) { diff --git a/llvm/test/Transforms/InstCombine/load-addrspace-cast.ll b/llvm/test/Transforms/InstCombine/load-addrspace-cast.ll deleted file mode 100644 index fd6339cc926..00000000000 --- a/llvm/test/Transforms/InstCombine/load-addrspace-cast.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: opt -instcombine -S < %s | FileCheck %s -target datalayout = "e-p:64:64:64-n8:16:32:64" - -define i32* @pointer_to_addrspace_pointer(i32 addrspace(1)** %x) nounwind { -; CHECK-LABEL: @pointer_to_addrspace_pointer( -; CHECK: load -; CHECK: addrspacecast - %y = bitcast i32 addrspace(1)** %x to i32** - %z = load i32** %y - ret i32* %z -} - |