7 files changed, 58 insertions, 104 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 4aafc2e2cad..cfdfa00e805 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -291,76 +291,58 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
   return visitAllocSite(AI);
 }
 
+/// \brief Combine loads to match the type of value their uses after looking
+/// through intervening bitcasts.
+///
+/// The core idea here is that if the result of a load is used in an operation,
+/// we should load the type most conducive to that operation. For example, when
+/// loading an integer and converting that immediately to a pointer, we should
+/// instead directly load a pointer.
+///
+/// However, this routine must never change the width of a load or the number of
+/// loads as that would introduce a semantic change. This combine is expected to
+/// be a semantic no-op which just allows loads to more closely model the types
+/// of their consuming operations.
+///
+/// Currently, we also refuse to change the precise type used for an atomic load
+/// or a volatile load. This is debatable, and might be reasonable to change
+/// later. However, it is risky in case some backend or other part of LLVM is
+/// relying on the exact type loaded to select appropriate atomic operations.
+static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
+  // FIXME: We could probably with some care handle both volatile and atomic
+  // loads here but it isn't clear that this is important.
+  if (!LI.isSimple())
+    return nullptr;
 
-/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
-static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
-                                        const DataLayout *DL) {
-  User *CI = cast<User>(LI.getOperand(0));
-  Value *CastOp = CI->getOperand(0);
-
-  PointerType *DestTy = cast<PointerType>(CI->getType());
-  Type *DestPTy = DestTy->getElementType();
-  if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
-
-    // If the address spaces don't match, don't eliminate the cast.
-    if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
-      return nullptr;
-
-    Type *SrcPTy = SrcTy->getElementType();
-
-    if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
-         DestPTy->isVectorTy()) {
-      // If the source is an array, the code below will not succeed.  Check to
-      // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
-      // constants.
-      if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
-        if (Constant *CSrc = dyn_cast<Constant>(CastOp))
-          if (ASrcTy->getNumElements() != 0) {
-            Type *IdxTy = DL
-                        ? DL->getIntPtrType(SrcTy)
-                        : Type::getInt64Ty(SrcTy->getContext());
-            Value *Idx = Constant::getNullValue(IdxTy);
-            Value *Idxs[2] = { Idx, Idx };
-            CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
-            SrcTy = cast<PointerType>(CastOp->getType());
-            SrcPTy = SrcTy->getElementType();
-          }
-
-      if (IC.getDataLayout() &&
-          (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
-            SrcPTy->isVectorTy()) &&
-          // Do not allow turning this into a load of an integer, which is then
-          // casted to a pointer, this pessimizes pointer analysis a lot.
-          (SrcPTy->isPtrOrPtrVectorTy() ==
-           LI.getType()->isPtrOrPtrVectorTy()) &&
-          IC.getDataLayout()->getTypeSizeInBits(SrcPTy) ==
-               IC.getDataLayout()->getTypeSizeInBits(DestPTy)) {
-
-        // Okay, we are casting from one integer or pointer type to another of
-        // the same size.  Instead of casting the pointer before the load, cast
-        // the result of the loaded value.
-        LoadInst *NewLoad =
-          IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
-        NewLoad->setAlignment(LI.getAlignment());
-        NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
-        // Now cast the result of the load.
-        PointerType *OldTy = dyn_cast<PointerType>(NewLoad->getType());
-        PointerType *NewTy = dyn_cast<PointerType>(LI.getType());
-        if (OldTy && NewTy &&
-            OldTy->getAddressSpace() != NewTy->getAddressSpace()) {
-          return new AddrSpaceCastInst(NewLoad, LI.getType());
-        }
+  if (LI.use_empty())
+    return nullptr;
 
-        return new BitCastInst(NewLoad, LI.getType());
-      }
+  Value *Ptr = LI.getPointerOperand();
+  unsigned AS = LI.getPointerAddressSpace();
+
+  // Fold away bit casts of the loaded value by loading the desired type.
+  if (LI.hasOneUse())
+    if (auto *BC = dyn_cast<BitCastInst>(LI.user_back())) {
+      LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
+          IC.Builder->CreateBitCast(Ptr, BC->getDestTy()->getPointerTo(AS)),
+          LI.getAlignment(), LI.getName());
+      BC->replaceAllUsesWith(NewLoad);
+      IC.EraseInstFromFunction(*BC);
+      return &LI;
     }
-  }
+
+  // FIXME: We should also canonicalize loads of vectors when their elements are
+  // cast to other types.
   return nullptr;
 }
 
 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   Value *Op = LI.getOperand(0);
 
+  // Try to canonicalize the loaded type.
+  if (Instruction *Res = combineLoadToOperationType(*this, LI))
+    return Res;
+
   // Attempt to improve the alignment.
   if (DL) {
     unsigned KnownAlign =
@@ -376,11 +358,6 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
       LI.setAlignment(EffectiveLoadAlign);
   }
 
-  // load (cast X) --> cast (load X) iff safe.
-  if (isa<CastInst>(Op))
-    if (Instruction *Res = InstCombineLoadCast(*this, LI, DL))
-      return Res;
-
   // None of the following transforms are legal for volatile/atomic loads.
   // FIXME: Some of it is okay for atomic loads; needs refactoring.
   if (!LI.isSimple()) return nullptr;
@@ -419,12 +396,6 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
     return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
   }
 
-  // Instcombine load (constantexpr_cast global) -> cast (load global)
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
-    if (CE->isCast())
-      if (Instruction *Res = InstCombineLoadCast(*this, LI, DL))
-        return Res;
-
   if (Op->hasOneUse()) {
     // Change select and PHI nodes to select values instead of addresses: this
     // helps alias analysis out a lot, allows many others simplifications, and
diff --git a/llvm/test/Transforms/InstCombine/atomic.ll b/llvm/test/Transforms/InstCombine/atomic.ll
index ccee87433f3..98cecefcc29 100644
--- a/llvm/test/Transforms/InstCombine/atomic.ll
+++ b/llvm/test/Transforms/InstCombine/atomic.ll
@@ -5,14 +5,6 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 ; Check transforms involving atomic operations
 
-define i32* @test1(i8** %p) {
-; CHECK-LABEL: define i32* @test1(
-; CHECK: load atomic i8** %p monotonic, align 8
-  %c = bitcast i8** %p to i32**
-  %r = load atomic i32** %c monotonic, align 8
-  ret i32* %r
-}
-
 define i32 @test2(i32* %p) {
 ; CHECK-LABEL: define i32 @test2(
 ; CHECK: %x = load atomic i32* %p seq_cst, align 4
diff --git a/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll b/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll
index a6b56f94ffb..bc36b25b6de 100644
--- a/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll
+++ b/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll
@@ -90,7 +90,8 @@ entry:
 define void @bitcast_alias_scalar(float* noalias %source, float* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_scalar
-; CHECK: bitcast float %tmp to i32
+; CHECK: bitcast float* %source to i32*
+; CHECK: load i32*
 ; CHECK-NOT: fptoui
 ; CHECK-NOT: uitofp
 ; CHECK: bitcast i32 %call to float
@@ -104,7 +105,8 @@ entry:
 define void @bitcast_alias_vector(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector
-; CHECK: bitcast <2 x float> %tmp to <2 x i32>
+; CHECK: bitcast <2 x float>* %source to <2 x i32>*
+; CHECK: load <2 x i32>*
 ; CHECK-NOT: fptoui
 ; CHECK-NOT: uitofp
 ; CHECK: bitcast <2 x i32> %call to <2 x float>
@@ -118,7 +120,8 @@ entry:
 define void @bitcast_alias_vector_scalar_same_size(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector_scalar_same_size
-; CHECK: bitcast <2 x float> %tmp to i64
+; CHECK: bitcast <2 x float>* %source to i64*
+; CHECK: load i64*
 ; CHECK: %call = call i64 @func_i64
 ; CHECK: bitcast i64 %call to <2 x float>
   %tmp = load <2 x float>* %source, align 8
@@ -130,7 +133,8 @@ entry:
 define void @bitcast_alias_scalar_vector_same_size(i64* noalias %source, i64* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_scalar_vector_same_size
-; CHECK: bitcast i64 %tmp to <2 x float>
+; CHECK: bitcast i64* %source to <2 x float>*
+; CHECK: load <2 x float>*
 ; CHECK: call <2 x float> @func_v2f32
 ; CHECK: bitcast <2 x float> %call to i64
   %tmp = load i64* %source, align 8
@@ -142,7 +146,8 @@ entry:
 define void @bitcast_alias_vector_ptrs_same_size(<2 x i64*>* noalias %source, <2 x i64*>* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector_ptrs_same_size
-; CHECK: bitcast <2 x i64*> %tmp to <2 x i32*>
+; CHECK: bitcast <2 x i64*>* %source to <2 x i32*>*
+; CHECK: load <2 x i32*>*
 ; CHECK: call <2 x i32*> @func_v2i32p
 ; CHECK: bitcast <2 x i32*> %call to <2 x i64*>
   %tmp = load <2 x i64*>* %source, align 8
diff --git a/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
index 7fac78a40f5..bb61f02c8e2 100644
--- a/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
+++ b/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
@@ -161,12 +161,11 @@ define i32 @constant_fold_bitcast_itof_load() {
   ret i32 %a
 }
 
-define <4 x i32> @constant_fold_bitcast_vector_as() {
+define <4 x float> @constant_fold_bitcast_vector_as() {
 ; CHECK-LABEL: @constant_fold_bitcast_vector_as(
 ; CHECK: load <4 x float> addrspace(3)* @g_v4f_as3, align 16
-; CHECK: bitcast <4 x float> %1 to <4 x i32>
-  %a = load <4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*), align 4
-  ret <4 x i32> %a
+  %a = load <4 x float> addrspace(3)* bitcast (<4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*) to <4 x float> addrspace(3)*), align 4
+  ret <4 x float> %a
 }
 
 @i32_array_as3 = addrspace(3) global [10 x i32] zeroinitializer
diff --git a/llvm/test/Transforms/InstCombine/descale-zero.ll b/llvm/test/Transforms/InstCombine/descale-zero.ll
index 7990fdb3eca..4656837cc05 100644
--- a/llvm/test/Transforms/InstCombine/descale-zero.ll
+++ b/llvm/test/Transforms/InstCombine/descale-zero.ll
@@ -5,8 +5,7 @@ target triple = "x86_64-apple-macosx10.10.0"
 
 define internal i8* @descale_zero() {
 entry:
-; CHECK: load i16** inttoptr (i64 48 to i16**), align 16
-; CHECK-NEXT: bitcast i16*
+; CHECK: load i8** inttoptr (i64 48 to i8**), align 16
 ; CHECK-NEXT: ret i8*
   %i16_ptr = load i16** inttoptr (i64 48 to i16**), align 16
   %num = load i64* inttoptr (i64 64 to i64*), align 64
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index b7daccc71af..bb4666219a7 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -703,7 +703,7 @@ define void @test39(%struct.ham* %arg, i8 %arg1) nounwind {
 
 ; CHECK-LABEL: @test39(
 ; CHECK: getelementptr inbounds %struct.ham* %arg, i64 0, i32 2
-; CHECK: getelementptr inbounds i8* %tmp3, i64 -8
+; CHECK: getelementptr inbounds i8* %{{.+}}, i64 -8
 }
 
 define i1 @pr16483([1 x i8]* %a, [1 x i8]* %b) {
diff --git a/llvm/test/Transforms/InstCombine/load-addrspace-cast.ll b/llvm/test/Transforms/InstCombine/load-addrspace-cast.ll
deleted file mode 100644
index fd6339cc926..00000000000
--- a/llvm/test/Transforms/InstCombine/load-addrspace-cast.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: opt -instcombine -S < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-n8:16:32:64"
-
-define i32* @pointer_to_addrspace_pointer(i32 addrspace(1)** %x) nounwind {
-; CHECK-LABEL: @pointer_to_addrspace_pointer(
-; CHECK: load
-; CHECK: addrspacecast
-  %y = bitcast i32 addrspace(1)** %x to i32**
-  %z = load i32** %y
-  ret i32* %z
-}
-