[ARM] Disallow zexts in ARMCodeGenPrepare

Enabling ARMCodeGenPrepare by default caused a whole load of failures. This is due to zexts and truncs not being handled properly. ZExts are messy so it's just easier to disable for now and truncs are allowed only as 'sinks'. I still need to figure out why allowing them as 'sources' causes so many failures. The other main changes are that we are explicit in the types that we converting to, it's now always 'TypeSize'. Type support is also now performed while checking for valid opcodes as it unnecessarily complicated having the checks are different stages. I've moved the tests around too, so we have the zext and truncs in their own file as well as the overflowing opcode tests. Differential Revision: https://reviews.llvm.org/D50518 llvm-svn: 339432
author: Sam Parker <sam.parker@arm.com> 2018-08-10 13:57:13 +0000
committer: Sam Parker <sam.parker@arm.com> 2018-08-10 13:57:13 +0000
commit: 8c4b964c5ac430455c1c817831056c3ace90d92c (patch)
tree: c76153ff41f1f0e726cc889613a5c93cd8440b56
parent: 53ac1776f3be82a97f94591cc08c1864f5e24f4b (diff)
download: bcm5719-llvm-8c4b964c5ac430455c1c817831056c3ace90d92c.tar.gz
bcm5719-llvm-8c4b964c5ac430455c1c817831056c3ace90d92c.zip
5 files changed, 452 insertions, 446 deletions
diff --git a/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp b/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
index 24071277427..d0ff4441371 100644
--- a/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
+++ b/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
@@ -85,16 +85,15 @@ class ARMCodeGenPrepare : public FunctionPass {
   const ARMSubtarget *ST = nullptr;
   IRPromoter *Promoter = nullptr;
   std::set<Value*> AllVisited;
-  Type *OrigTy = nullptr;
-  unsigned TypeSize = 0;
 
-  bool isNarrowInstSupported(Instruction *I);
   bool isSupportedValue(Value *V);
   bool isLegalToPromote(Value *V);
   bool TryToPromote(Value *V);
 
 public:
   static char ID;
+  static unsigned TypeSize;
+  Type *OrigTy = nullptr;
 
   ARMCodeGenPrepare() : FunctionPass(ID) {}
 
@@ -126,65 +125,66 @@ static bool isSigned(Value *V) {
 /// dealing with icmps but allow any other integer that is <= 16 bits. Void
 /// types are accepted so we can handle switches.
 static bool isSupportedType(Value *V) {
-  if (V->getType()->isVoidTy())
+  LLVM_DEBUG(dbgs() << "ARM CGP: isSupportedType: " << *V << "\n");
+  Type *Ty = V->getType();
+  if (Ty->isVoidTy())
     return true;
 
-  const IntegerType *IntTy = dyn_cast<IntegerType>(V->getType());
-  if (!IntTy)
-    return false;
+  if (auto *Ld = dyn_cast<LoadInst>(V))
+    Ty = cast<PointerType>(Ld->getPointerOperandType())->getElementType();
 
-  // Don't try to promote boolean values.
-  if (IntTy->getBitWidth() == 1)
+  const IntegerType *IntTy = dyn_cast<IntegerType>(Ty);
+  if (!IntTy) {
+    LLVM_DEBUG(dbgs() << "ARM CGP: No, not an integer.\n");
     return false;
+  }
 
-  if (auto *ZExt = dyn_cast<ZExtInst>(V))
-    return isSupportedType(ZExt->getOperand(0));
+  return IntTy->getBitWidth() == ARMCodeGenPrepare::TypeSize;
+}
 
-  return IntTy->getBitWidth() <= 16;
+/// Return true if the given value is a leaf in the use-def chain, producing
+/// a narrow (i8, i16) value. These values will be zext to start the promotion
+/// of the tree to i32. We guarantee that these won't populate the upper bits
+/// of the register. ZExt on the loads will be free, and the same for call
+/// return values because we only accept ones that guarantee a zeroext ret val.
+/// Many arguments will have the zeroext attribute too, so those would be free
+/// too.
+static bool isSource(Value *V) {
+  // TODO Allow truncs and zext to be sources.
+  if (isa<Argument>(V))
+    return true;
+  else if (isa<LoadInst>(V))
+    return true;
+  else if (auto *Call = dyn_cast<CallInst>(V))
+    return Call->hasRetAttr(Attribute::AttrKind::ZExt);
+  return false;
 }
 
 /// Return true if V will require any promoted values to be truncated for the
-/// use to be valid.
+/// the IR to remain valid. We can't mutate the value type of these
+/// instructions.
 static bool isSink(Value *V) {
+  // TODO The truncate also isn't actually necessary because we would already
+  // proved that the data value is kept within the range of the original data
+  // type.
   auto UsesNarrowValue = [](Value *V) {
-    return V->getType()->getScalarSizeInBits() <= 32;
+    return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize;
   };
 
   if (auto *Store = dyn_cast<StoreInst>(V))
     return UsesNarrowValue(Store->getValueOperand());
   if (auto *Return = dyn_cast<ReturnInst>(V))
     return UsesNarrowValue(Return->getReturnValue());
+  if (auto *Trunc = dyn_cast<TruncInst>(V))
+    return UsesNarrowValue(Trunc->getOperand(0));
 
   return isa<CallInst>(V);
 }
 
-/// Return true if the given value is a leaf that will need to be zext'd.
-static bool isSource(Value *V) {
-  if (isa<Argument>(V) && isSupportedType(V))
-    return true;
-  else if (isa<TruncInst>(V))
-    return true;
-  else if (auto *ZExt = dyn_cast<ZExtInst>(V))
-    // ZExt can be a leaf if its the only user of a load.
-    return isa<LoadInst>(ZExt->getOperand(0)) &&
-                         ZExt->getOperand(0)->hasOneUse();
-  else if (auto *Call = dyn_cast<CallInst>(V))
-    return Call->hasRetAttr(Attribute::AttrKind::ZExt);
-  else if (auto *Load = dyn_cast<LoadInst>(V)) {
-    if (!isa<IntegerType>(Load->getType()))
-      return false;
-    // A load is a leaf, unless its already just being zext'd.
-    if (Load->hasOneUse() && isa<ZExtInst>(*Load->use_begin()))
-      return false;
-
-    return true;
-  }
-  return false;
-}
-
 /// Return whether the instruction can be promoted within any modifications to
 /// it's operands or result.
 static bool isSafeOverflow(Instruction *I) {
+  // FIXME Do we need NSW too?
   if (isa<OverflowingBinaryOperator>(I) && I->hasNoUnsignedWrap())
     return true;
 
@@ -222,19 +222,18 @@ static bool isSafeOverflow(Instruction *I) {
 }
 
 static bool shouldPromote(Value *V) {
-  auto *I = dyn_cast<Instruction>(V);
-  if (!I)
+  if (!isa<IntegerType>(V->getType()) || isSink(V))
     return false;
 
-  if (!isa<IntegerType>(V->getType()))
-    return false;
+  if (isSource(V))
+    return true;
 
-  if (isa<StoreInst>(I) || isa<TerminatorInst>(I) || isa<TruncInst>(I) ||
-      isa<ICmpInst>(I))
+  auto *I = dyn_cast<Instruction>(V);
+  if (!I)
     return false;
 
-  if (auto *ZExt = dyn_cast<ZExtInst>(I))
-    return !ZExt->getDestTy()->isIntegerTy(32);
+  if (isa<ICmpInst>(I))
+    return false;
 
   return true;
 }
@@ -262,7 +261,7 @@ static bool isPromotedResultSafe(Value *V) {
 /// Return the intrinsic for the instruction that can perform the same
 /// operation but on a narrow type. This is using the parallel dsp intrinsics
 /// on scalar values.
-static Intrinsic::ID getNarrowIntrinsic(Instruction *I, unsigned TypeSize) {
+static Intrinsic::ID getNarrowIntrinsic(Instruction *I) {
   // Whether we use the signed or unsigned versions of these intrinsics
   // doesn't matter because we're not using the GE bits that they set in
   // the APSR.
@@ -270,10 +269,10 @@ static Intrinsic::ID getNarrowIntrinsic(Instruction *I, unsigned TypeSize) {
   default:
     break;
   case Instruction::Add:
-    return TypeSize == 16 ? Intrinsic::arm_uadd16 :
+    return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_uadd16 :
       Intrinsic::arm_uadd8;
   case Instruction::Sub:
-    return TypeSize == 16 ? Intrinsic::arm_usub16 :
+    return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_usub16 :
       Intrinsic::arm_usub8;
   }
   llvm_unreachable("unhandled opcode for narrow intrinsic");
@@ -285,10 +284,9 @@ void IRPromoter::Mutate(Type *OrigTy,
                         SmallPtrSetImpl<Instruction*> &Roots) {
   IRBuilder<> Builder{Ctx};
   Type *ExtTy = Type::getInt32Ty(M->getContext());
-  unsigned TypeSize = OrigTy->getPrimitiveSizeInBits();
   SmallPtrSet<Value*, 8> Promoted;
-  LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from " << TypeSize
-        << " to 32-bits\n");
+  LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
+             << ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
 
   auto ReplaceAllUsersOfWith = [&](Value *From, Value *To) {
     SmallVector<Instruction*, 4> Users;
@@ -325,7 +323,7 @@ void IRPromoter::Mutate(Type *OrigTy,
     LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for "
                << *I << "\n");
     Function *DSPInst =
-      Intrinsic::getDeclaration(M, getNarrowIntrinsic(I, TypeSize));
+      Intrinsic::getDeclaration(M, getNarrowIntrinsic(I));
     Builder.SetInsertPoint(I);
     Builder.SetCurrentDebugLocation(I->getDebugLoc());
     Value *Args[] = { I->getOperand(0), I->getOperand(1) };
@@ -353,9 +351,7 @@ void IRPromoter::Mutate(Type *OrigTy,
   LLVM_DEBUG(dbgs() << "ARM CGP: Promoting leaves:\n");
   for (auto V : Leaves) {
     LLVM_DEBUG(dbgs() << " - " << *V << "\n");
-    if (auto *ZExt = dyn_cast<ZExtInst>(V))
-      ZExt->mutateType(ExtTy);
-    else if (auto *I = dyn_cast<Instruction>(V))
+    if (auto *I = dyn_cast<Instruction>(V))
       InsertZExt(I, I);
     else if (auto *Arg = dyn_cast<Argument>(V)) {
       BasicBlock &BB = Arg->getParent()->front();
@@ -401,17 +397,9 @@ void IRPromoter::Mutate(Type *OrigTy,
   for (auto *V : Visited) {
     if (Leaves.count(V))
       continue;
-    if (auto *ZExt = dyn_cast<ZExtInst>(V)) {
-      if (ZExt->getDestTy() != ExtTy) {
-        ZExt->mutateType(ExtTy);
-        Promoted.insert(ZExt);
-      }
-      else if (ZExt->getSrcTy() == ExtTy) {
-        ReplaceAllUsersOfWith(V, ZExt->getOperand(0));
-        InstsToRemove.push_back(ZExt);
-      }
+
+    if (!isa<Instruction>(V))
       continue;
-    }
 
     if (!shouldPromote(V) || isPromotedResultSafe(V))
       continue;
@@ -459,30 +447,6 @@ void IRPromoter::Mutate(Type *OrigTy,
   LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete.\n");
 }
 
-bool ARMCodeGenPrepare::isNarrowInstSupported(Instruction *I) {
-  if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I))
-    return false;
-
-  if (ST->isThumb() && !ST->hasThumb2())
-    return false;
-
-  if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub)
-    return false;
-
-  // TODO
-  // Would it be profitable? For Thumb code, these parallel DSP instructions
-  // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
-  // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
-  // halved. They also do not take immediates as operands.
-  for (auto &Op : I->operands()) {
-    if (isa<Constant>(Op)) {
-      if (!EnableDSPWithImms)
-        return false;
-    }
-  }
-  return true;
-}
-
 /// We accept most instructions, as well as Arguments and ConstantInsts. We
 /// Disallow casts other than zext and truncs and only allow calls if their
 /// return value is zeroext. We don't allow opcodes that can introduce sign
@@ -490,42 +454,42 @@ bool ARMCodeGenPrepare::isNarrowInstSupported(Instruction *I) {
 bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
   LLVM_DEBUG(dbgs() << "ARM CGP: Is " << *V << " supported?\n");
 
-  // Non-instruction values that we can handle.
-  if (isa<ConstantInt>(V) || isa<Argument>(V))
-    return true;
+  if (auto *ICmp = dyn_cast<ICmpInst>(V))
+    return ICmp->isEquality() || !ICmp->isSigned();
 
   // Memory instructions
-  if (isa<StoreInst>(V) || isa<LoadInst>(V) || isa<GetElementPtrInst>(V))
+  if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V))
     return true;
 
   // Branches and targets.
-  if (auto *ICmp = dyn_cast<ICmpInst>(V))
-    return ICmp->isEquality() || !ICmp->isSigned();
-
   if( isa<BranchInst>(V) || isa<SwitchInst>(V) || isa<BasicBlock>(V))
     return true;
 
-  if (isa<PHINode>(V) || isa<SelectInst>(V) || isa<ReturnInst>(V))
-    return true;
+  // Non-instruction values that we can handle.
+  if (isa<ConstantInt>(V) || isa<Argument>(V))
+    return isSupportedType(V);
+
+  if (isa<PHINode>(V) || isa<SelectInst>(V) || isa<ReturnInst>(V) ||
+      isa<LoadInst>(V))
+    return isSupportedType(V);
+
+  // Currently, Trunc is the only cast we support.
+  if (auto *Trunc = dyn_cast<TruncInst>(V))
+    return isSupportedType(Trunc->getOperand(0));
 
   // Special cases for calls as we need to check for zeroext
   // TODO We should accept calls even if they don't have zeroext, as they can
   // still be roots.
   if (auto *Call = dyn_cast<CallInst>(V))
-    return Call->hasRetAttr(Attribute::AttrKind::ZExt);
-  else if (auto *Cast = dyn_cast<CastInst>(V)) {
-    if (isa<ZExtInst>(Cast))
-      return Cast->getDestTy()->getScalarSizeInBits() <= 32;
-    else if (auto *Trunc = dyn_cast<TruncInst>(V))
-      return Trunc->getDestTy()->getScalarSizeInBits() <= TypeSize;
-    else {
-      LLVM_DEBUG(dbgs() << "ARM CGP: No, unsupported cast.\n");
-      return false;
-    }
-  } else if (!isa<BinaryOperator>(V)) {
+    return isSupportedType(Call) &&
+           Call->hasRetAttr(Attribute::AttrKind::ZExt);
+
+  if (!isa<BinaryOperator>(V)) {
     LLVM_DEBUG(dbgs() << "ARM CGP: No, not a binary operator.\n");
     return false;
   }
+  if (!isSupportedType(V))
+    return false;
 
   bool res = !isSigned(V);
   if (!res)
@@ -537,39 +501,49 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
 /// smaller than the targeted promoted type. Check that we're not trying to
 /// promote something larger than our base 'TypeSize' type.
 bool ARMCodeGenPrepare::isLegalToPromote(Value *V) {
-  if (!isSupportedType(V))
-    return false;
+  if (isPromotedResultSafe(V))
+    return true;
 
-  unsigned VSize = 0;
-  if (auto *Ld = dyn_cast<LoadInst>(V)) {
-    auto *PtrTy = cast<PointerType>(Ld->getPointerOperandType());
-    VSize = PtrTy->getElementType()->getPrimitiveSizeInBits();
-  } else if (auto *ZExt = dyn_cast<ZExtInst>(V)) {
-    VSize = ZExt->getOperand(0)->getType()->getPrimitiveSizeInBits();
-  } else {
-    VSize = V->getType()->getPrimitiveSizeInBits();
-  }
+  auto *I = dyn_cast<Instruction>(V);
+  if (!I)
+    return false;
 
-  if (VSize > TypeSize)
+  // If promotion is not safe, can we use a DSP instruction to natively
+  // handle the narrow type?
+  if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I))
     return false;
 
-  if (isPromotedResultSafe(V))
-    return true;
+  if (ST->isThumb() && !ST->hasThumb2())
+    return false;
 
-  if (auto *I = dyn_cast<Instruction>(V))
-    return isNarrowInstSupported(I);
+  if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub)
+    return false;
 
-  return false;
+  // TODO
+  // Would it be profitable? For Thumb code, these parallel DSP instructions
+  // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
+  // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
+  // halved. They also do not take immediates as operands.
+  for (auto &Op : I->operands()) {
+    if (isa<Constant>(Op)) {
+      if (!EnableDSPWithImms)
+        return false;
+    }
+  }
+  return true;
 }
 
 bool ARMCodeGenPrepare::TryToPromote(Value *V) {
   OrigTy = V->getType();
   TypeSize = OrigTy->getPrimitiveSizeInBits();
+  if (TypeSize > 16)
+    return false;
 
   if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
     return false;
 
-  LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << "\n");
+  LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << ", TypeSize = "
+             << TypeSize << "\n");
 
   SetVector<Value*> WorkList;
   SmallPtrSet<Value*, 8> Leaves;
@@ -584,6 +558,10 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
     if (CurrentVisited.count(V))
       return true;
 
+    // Ignore pointer value that aren't instructions.
+    if (!isa<Instruction>(V) && isa<PointerType>(V->getType()))
+      return true;
+
     if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
       LLVM_DEBUG(dbgs() << "ARM CGP: Can't handle: " << *V << "\n");
       return false;
@@ -638,41 +616,10 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
     }
   }
 
-  unsigned NumToPromote = 0;
-  unsigned Cost = 0;
-  for (auto *V : CurrentVisited) {
-    // Truncs will cause a uxt and no zeroext arguments will often require
-    // a uxt somewhere.
-    if (isa<TruncInst>(V))
-      ++Cost;
-    else if (auto *Arg = dyn_cast<Argument>(V)) {
-      if (!Arg->hasZExtAttr())
-        ++Cost;
-    }
-
-    // Mem ops can automatically be extended/truncated and non-instructions
-    // don't need anything done.
-    if (Leaves.count(V) || isa<StoreInst>(V) || !isa<Instruction>(V))
-      continue;
-
-    // Will need to truncate calls args and returns.
-    if (Roots.count(cast<Instruction>(V))) {
-      ++Cost;
-      continue;
-    }
-
-    if (shouldPromote(V))
-      ++NumToPromote;
-  }
-
   LLVM_DEBUG(dbgs() << "ARM CGP: Visited nodes:\n";
              for (auto *I : CurrentVisited)
                I->dump();
              );
-  LLVM_DEBUG(dbgs() << "ARM CGP: Cost of promoting " << NumToPromote
-             << " instructions = " << Cost << "\n");
-  if (Cost > NumToPromote || (NumToPromote == 0))
-    return false;
 
   Promoter->Mutate(OrigTy, CurrentVisited, Leaves, Roots);
   return true;
@@ -712,12 +659,8 @@ bool ARMCodeGenPrepare::runOnFunction(Function &F) {
 
         LLVM_DEBUG(dbgs() << "ARM CGP: Searching from: " << CI << "\n");
         for (auto &Op : CI.operands()) {
-          if (auto *I = dyn_cast<Instruction>(Op)) {
-            if (isa<ZExtInst>(I))
-              MadeChange |= TryToPromote(I->getOperand(0));
-            else
-              MadeChange |= TryToPromote(I);
-          }
+          if (auto *I = dyn_cast<Instruction>(Op))
+            MadeChange |= TryToPromote(I);
         }
       }
     }
@@ -744,6 +687,7 @@ INITIALIZE_PASS_END(ARMCodeGenPrepare, DEBUG_TYPE, "ARM IR optimizations",
                     false, false)
 
 char ARMCodeGenPrepare::ID = 0;
+unsigned ARMCodeGenPrepare::TypeSize = 0;
 
 FunctionPass *llvm::createARMCodeGenPreparePass() {
   return new ARMCodeGenPrepare();
diff --git a/llvm/test/CodeGen/ARM/arm-cgp-icmps.ll b/llvm/test/CodeGen/ARM/arm-cgp-icmps.ll
index 18df13f732e..f3a575983cf 100644
--- a/llvm/test/CodeGen/ARM/arm-cgp-icmps.ll
+++ b/llvm/test/CodeGen/ARM/arm-cgp-icmps.ll
@@ -158,39 +158,6 @@ entry:
   ret i32 %res
 }
 
-; CHECK-COMMON-LABEL: dsp_imm2
-; CHECK-COMMON:   add   r0, r1
-; CHECK-DSP-NEXT: ldrh  r1, [r3]
-; CHECK-DSP-NEXT: ldrh  r2, [r2]
-; CHECK-DSP-NEXT: subs  r1, r1, r0
-; CHECK-DSP-NEXT: add   r0, r2
-; CHECK-DSP-NEXT: uxth  r3, r1
-; CHECK-DSP-NEXT: uxth  r2, r0
-; CHECK-DSP-NEXT: cmp   r2, r3
-
-; CHECK-DSP-IMM:      movs  r1, #0
-; CHECK-DSP-IMM-NEXT: uxth  r0, r0
-; CHECK-DSP-IMM-NEXT: usub16  r1, r1, r0
-; CHECK-DSP-IMM-NEXT: ldrh  r0, [r2]
-; CHECK-DSP-IMM-NEXT: ldrh  r3, [r3]
-; CHECK-DSP-IMM-NEXT: usub16  r0, r0, r1
-; CHECK-DSP-IMM-NEXT: uadd16  r1, r3, r1
-; CHECK-DSP-IMM-NEXT: cmp r0, r1
-
-define i16 @dsp_imm2(i32 %arg0, i32 %arg1, i16* %gep0, i16* %gep1) {
-entry:
-  %add0 = add i32 %arg0, %arg1
-  %conv0 = trunc i32 %add0 to i16
-  %sub0 = sub i16 0, %conv0
-  %load0 = load i16, i16* %gep0, align 2
-  %load1 = load i16, i16* %gep1, align 2
-  %sub1 = sub i16 %load0, %sub0
-  %add1 = add i16 %load1, %sub0
-  %cmp = icmp ult i16 %sub1, %add1
-  %res = select i1 %cmp, i16 %add1, i16 %sub1
-  ret i16 %res
-}
-
 ; CHECK-COMMON-LABEL: dsp_var:
 ; CHECK-COMMON:   eors    r1, r0
 ; CHECK-COMMON:   and     r2, r0, #7
@@ -267,109 +234,6 @@ entry:
   ret i32 %res
 }
 
-; CHECK-COMMON-LABEL: icmp_i32_zext:
-; CHECK-COMMON:     ldrb [[LD:r[^ ]+]], [r0]
-; CHECK-COMMON:     subs [[SUB:r[^ ]+]], [[LD]], #1
-; CHECK-COMMON-NOT: uxt
-; CHECK-COMMON:     cmp [[LD]], [[SUB]]
-; CHECK-COMMON-NOT: uxt
-define i8 @icmp_i32_zext(i8* %ptr) {
-entry:
-  %gep = getelementptr inbounds i8, i8* %ptr, i32 0
-  %0 = load i8, i8* %gep, align 1
-  %1 = sub nuw nsw i8 %0, 1
-  %conv44 = zext i8 %0 to i32
-  br label %preheader
-
-preheader:
-  br label %body
-
-body:
-  %2 = phi i8 [ %1, %preheader ], [ %3, %if.end ]
-  %si.0274 = phi i32 [ %conv44, %preheader ], [ %inc, %if.end ]
-  %conv51266 = zext i8 %2 to i32
-  %cmp52267 = icmp eq i32 %si.0274, %conv51266
-  br i1 %cmp52267, label %if.end, label %exit
-
-if.end:
-  %inc = add i32 %si.0274, 1
-  %gep1 = getelementptr inbounds i8, i8* %ptr, i32 %inc
-  %3 = load i8, i8* %gep1, align 1
-  br label %body
-
-exit:
-  ret i8 %2
-}
-
-@d_uch = hidden local_unnamed_addr global [16 x i8] zeroinitializer, align 1
-@sh1 = hidden local_unnamed_addr global i16 0, align 2
-@d_sh = hidden local_unnamed_addr global [16 x i16] zeroinitializer, align 2
-
-; CHECK-COMMON-LABEL: icmp_sext_zext_store_i8_i16
-; CHECK-NODSP: ldrb [[BYTE:r[^ ]+]],
-; CHECK-NODSP: strh [[BYTE]],
-; CHECK-NODSP: ldrsh.w
-define i32 @icmp_sext_zext_store_i8_i16() {
-entry:
-  %0 = load i8, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @d_uch, i32 0, i32 2), align 1
-  %conv = zext i8 %0 to i16
-  store i16 %conv, i16* @sh1, align 2
-  %conv1 = zext i8 %0 to i32
-  %1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @d_sh, i32 0, i32 2), align 2
-  %conv2 = sext i16 %1 to i32
-  %cmp = icmp eq i32 %conv1, %conv2
-  %conv3 = zext i1 %cmp to i32
-  ret i32 %conv3
-}
-
-; CHECK-COMMON-LABEL: or_icmp_ugt:
-; CHECK-COMMON:     ldrb [[LD:r[^ ]+]], [r1]
-; CHECK-COMMON:     subs [[SUB:r[^ ]+]], #1
-; CHECK-COMMON-NOT: uxtb
-; CHECK-COMMON:     cmp [[SUB]], #3
-define i1 @or_icmp_ugt(i32 %arg, i8* %ptr) {
-entry:
-  %0 = load i8, i8* %ptr
-  %1 = zext i8 %0 to i32
-  %mul = shl nuw nsw i32 %1, 1
-  %add0 = add nuw nsw i32 %mul, 6
-  %cmp0 = icmp ne i32 %arg, %add0
-  %add1 = add i8 %0, -1
-  %cmp1 = icmp ugt i8 %add1, 3
-  %or = or i1 %cmp0, %cmp1
-  ret i1 %or
-}
-
-; CHECK-COMMON-LABEL: icmp_switch_trunc:
-; CHECK-COMMON-NOT: uxt
-define i16 @icmp_switch_trunc(i16 zeroext %arg) {
-entry:
-  %conv = add nuw i16 %arg, 15
-  %mul = mul nuw nsw i16 %conv, 3
-  %trunc = trunc i16 %arg to i3
-  switch i3 %trunc, label %default [
-    i3 0, label %sw.bb
-    i3 1, label %sw.bb.i
-  ]
-
-sw.bb:
-  %cmp0 = icmp ult i16 %mul, 127
-  %select = select i1 %cmp0, i16 %mul, i16 127
-  br label %exit
-
-sw.bb.i:
-  %cmp1 = icmp ugt i16 %mul, 34
-  %select.i = select i1 %cmp1, i16 %mul, i16 34
-  br label %exit
-
-default:
-  br label %exit
-
-exit:
-  %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ]
-  ret i16 %res
-}
-
 ; CHECK-COMMON-LABEL: icmp_eq_minus_one
 ; CHECK-COMMON: cmp r0, #255
 define i32 @icmp_eq_minus_one(i8* %ptr) {
@@ -392,77 +256,3 @@ define i32 @icmp_not(i16 zeroext %arg0, i16 zeroext %arg1) {
   ret i32 %res
 }
 
-; CHECK-COMMON-LABEL: mul_wrap
-; CHECK-COMMON: mul
-; CHECK-COMMON: uxth
-; CHECK-COMMON: cmp
-define i16 @mul_wrap(i16 %arg0, i16 %arg1) {
-  %mul = mul i16 %arg0, %arg1
-  %cmp = icmp eq i16 %mul, 1
-  %res = select i1 %cmp, i16 %arg0, i16 47
-  ret i16 %res
-}
-
-; CHECK-COMMON-LABEL: shl_wrap
-; CHECK-COMMON: lsl
-; CHECK-COMMON: uxth
-; CHECK-COMMON: cmp
-define i16 @shl_wrap(i16 %arg0) {
-  %mul = shl i16 %arg0, 4
-  %cmp = icmp eq i16 %mul, 1
-  %res = select i1 %cmp, i16 %arg0, i16 47
-  ret i16 %res
-}
-
-; CHECK-COMMON-LABEL: add_wrap
-; CHECK-COMMON: add
-; CHECK-COMMON: uxth
-; CHECK-COMMON: cmp
-define i16 @add_wrap(i16 %arg0, i16 %arg1) {
-  %add = add i16 %arg0, 128
-  %cmp = icmp eq i16 %add, %arg1
-  %res = select i1 %cmp, i16 %arg0, i16 1
-  ret i16 %res
-}
-
-; CHECK-COMMON-LABEL: sub_wrap
-; CHECK-COMMON: sub
-; CHECK-COMMON: uxth
-; CHECK-COMMON: cmp
-define i16 @sub_wrap(i16 %arg0, i16 %arg1, i16 %arg2) {
-  %sub = sub i16 %arg0, %arg2
-  %cmp = icmp eq i16 %sub, %arg1
-  %res = select i1 %cmp, i16 %arg0, i16 1
-  ret i16 %res
-}
-
-; CHECK-COMMON-LABEL: urem_trunc_icmps
-; CHECK-COMMON-NOT: uxt
-define void @urem_trunc_icmps(i16** %in, i32* %g, i32* %k) {
-entry:
-  %ptr = load i16*, i16** %in, align 4
-  %ld = load i16, i16* %ptr, align 2
-  %cmp.i = icmp eq i16 %ld, 0
-  br i1 %cmp.i, label %exit, label %cond.false.i
-
-cond.false.i:
-  %rem = urem i16 5, %ld
-  %extract.t = trunc i16 %rem to i8
-  br label %body
-
-body:
-  %cond.in.i.off0 = phi i8 [ %extract.t, %cond.false.i ], [ %add, %for.inc ]
-  %cmp = icmp ugt i8 %cond.in.i.off0, 7
-  %conv5 = zext i1 %cmp to i32
-  store i32 %conv5, i32* %g, align 4
-  %.pr = load i32, i32* %k, align 4
-  %tobool13150 = icmp eq i32 %.pr, 0
-  br i1 %tobool13150, label %for.inc, label %exit
-
-for.inc:
-  %add = add nuw i8 %cond.in.i.off0, 1
-  br label %body
-
-exit:
-  ret void
-}
diff --git a/llvm/test/CodeGen/ARM/arm-cgp-overflow.ll b/llvm/test/CodeGen/ARM/arm-cgp-overflow.ll
new file mode 100644
index 00000000000..ea19ac690ca
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/arm-cgp-overflow.ll
@@ -0,0 +1,49 @@
+; RUN: llc -mtriple=thumbv8.main -mcpu=cortex-m33 %s -arm-disable-cgp=false -o - | FileCheck %s
+
+; CHECK: overflow_add
+; CHECK: add
+; CHECK: uxth
+; CHECK: cmp
+define zeroext i16 @overflow_add(i16 zeroext %a, i16 zeroext %b) {
+  %add = add i16 %a, %b
+  %or = or i16 %add, 1
+  %cmp = icmp ugt i16 %or, 1024
+  %res = select i1 %cmp, i16 2, i16 5
+  ret i16 %res
+}
+
+; CHECK-LABEL: overflow_sub
+; CHECK: sub
+; CHECK: uxth
+; CHECK: cmp
+define zeroext i16 @overflow_sub(i16 zeroext %a, i16 zeroext %b) {
+  %add = sub i16 %a, %b
+  %or = or i16 %add, 1
+  %cmp = icmp ugt i16 %or, 1024
+  %res = select i1 %cmp, i16 2, i16 5
+  ret i16 %res
+}
+
+; CHECK-LABEL: overflow_mul
+; CHECK: mul
+; CHECK: uxth
+; CHECK: cmp
+define zeroext i16 @overflow_mul(i16 zeroext %a, i16 zeroext %b) {
+  %add = mul i16 %a, %b
+  %or = or i16 %add, 1
+  %cmp = icmp ugt i16 %or, 1024
+  %res = select i1 %cmp, i16 2, i16 5
+  ret i16 %res
+}
+
+; CHECK-LABEL: overflow_shl
+; CHECK-COMMON: lsl
+; CHECK-COMMON: uxth
+; CHECK-COMMON: cmp
+define zeroext i16 @overflow_shl(i16 zeroext %a, i16 zeroext %b) {
+  %add = shl i16 %a, %b
+  %or = or i16 %add, 1
+  %cmp = icmp ugt i16 %or, 1024
+  %res = select i1 %cmp, i16 2, i16 5
+  ret i16 %res
+}
diff --git a/llvm/test/CodeGen/ARM/arm-cgp-phis-calls-ret.ll b/llvm/test/CodeGen/ARM/arm-cgp-phis-calls-ret.ll
index 8587a907616..b3720c7841d 100644
--- a/llvm/test/CodeGen/ARM/arm-cgp-phis-calls-ret.ll
+++ b/llvm/test/CodeGen/ARM/arm-cgp-phis-calls-ret.ll
@@ -116,48 +116,6 @@ exit:
   ret void
 }
 
-; CHECK-COMMON-LABEL: phi_feeding_switch
-; CHECK-COMMON: ldrb
-; CHECK-COMMON: uxtb
-; CHECK-COMMON-NOT: uxt
-define void @phi_feeding_switch(i8* %memblock, i8* %store, i16 %arg) {
-entry:
-  %pre = load i8, i8* %memblock, align 1
-  %conv = trunc i16 %arg to i8
-  br label %header
-
-header:
-  %phi.0 = phi i8 [ %pre, %entry ], [ %count, %latch ]
-  %phi.1 = phi i8 [ %conv, %entry ], [ %phi.3, %latch ]
-  %phi.2 = phi i8 [ 0, %entry], [ %count, %latch ]
-  switch i8 %phi.0, label %default [
-    i8 43, label %for.inc.i
-    i8 45, label %for.inc.i.i
-  ]
-
-for.inc.i:
-  %xor = xor i8 %phi.1, 1
-  br label %latch
-
-for.inc.i.i:
-  %and = and i8 %phi.1, 3
-  br label %latch
-
-default:
-  %sub = sub i8 %phi.0, 1
-  %cmp2 = icmp ugt i8 %sub, 4
-  br i1 %cmp2, label %latch, label %exit
-
-latch:
-  %phi.3 = phi i8 [ %xor, %for.inc.i ], [ %and, %for.inc.i.i ], [ %phi.2, %default ]
-  %count = add nuw i8 %phi.2, 1
-  store i8 %count, i8* %store, align 1
-  br label %header
-
-exit:
-  ret void
-}
-
 ; CHECK-COMMON-LABEL: ret_i8
 ; CHECK-COMMON-NOT:   uxt
 define i8 @ret_i8() {
@@ -186,33 +144,6 @@ exit:
   ret i8 %inc2
 }
 
-; Check that %exp requires uxth in all cases, and will also be required to
-; promote %1 for the call - unless we can generate a uadd16.
-; CHECK-COMMON-LABEL: zext_load_sink_call:
-; CHECK-COMMON:       uxt
-; CHECK-DSP-IMM:      uadd16
-; CHECK-COMMON:       cmp
-; CHECK-DSP:          uxt
-; CHECK-DSP-IMM-NOT:  uxt
-define i32 @zext_load_sink_call(i16* %ptr, i16 %exp) {
-entry:
-  %0 = load i16, i16* %ptr, align 4
-  %1 = add i16 %exp, 3
-  %cmp = icmp eq i16 %0, %exp
-  br i1 %cmp, label %exit, label %if.then
-
-if.then:
-  %conv0 = zext i16 %0 to i32
-  %conv1 = zext i16 %1 to i32
-  %call = tail call arm_aapcs_vfpcc i32 @dummy(i32 %conv0, i32 %conv1)
-  br label %exit
-
-exit:
-  %exitval = phi i32 [ %call, %if.then ], [ 0, %entry  ]
-  ret i32 %exitval
-}
-
-
 ; Check that the pass doesn't try to promote the immediate parameters.
 ; CHECK-COMMON-LABEL: call_with_imms
 ; CHECK-COMMON-NOT:   uxt
@@ -301,9 +232,10 @@ entry:
   ret i32 undef
 }
 
+; Transform will bail because of the zext
 ; Check that d.sroa.0.0.be is promoted passed directly into the tail call.
 ; CHECK-COMMON-LABEL: check_zext_phi_call_arg
-; CHECK-COMMON-NOT: uxt
+; CHECK-COMMON: uxt
 define i32 @check_zext_phi_call_arg() {
 entry:
   br label %for.cond
@@ -385,7 +317,6 @@ declare dso_local fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 returned zeroe
 declare dso_local i32 @e(...) local_unnamed_addr #1
 declare dso_local zeroext i16 @f(...) local_unnamed_addr #1
 
-declare i32 @dummy(i32, i32)
 declare i8 @dummy_i8(i8)
 declare i8 @dummy2(i8*, i8, i8)
 declare i16 @dummy3(i16)
diff --git a/llvm/test/CodeGen/ARM/arm-cgp-zext-truncs.ll b/llvm/test/CodeGen/ARM/arm-cgp-zext-truncs.ll
new file mode 100644
index 00000000000..a4a618e3c11
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/arm-cgp-zext-truncs.ll
@@ -0,0 +1,292 @@
+; RUN: llc -mtriple=thumbv8.main -mcpu=cortex-m33 %s -arm-disable-cgp=false -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP
+; RUN: llc -mtriple=thumbv7-linux-android %s -arm-disable-cgp=false -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP
+; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP
+; RUN: llc -mtriple=thumbv8 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM
+
+; Transform will fail because the trunc is not a sink.
+; CHECK-COMMON-LABEL: dsp_trunc
+; CHECK-COMMON:   add   [[ADD:[^ ]+]],
+; CHECK-DSP-NEXT: ldrh  r1, [r3]
+; CHECK-DSP-NEXT: ldrh  r2, [r2]
+; CHECK-DSP-NEXT: subs  r1, r1, [[ADD]]
+; CHECK-DSP-NEXT: add   r0, r2
+; CHECK-DSP-NEXT: uxth  r3, r1
+; CHECK-DSP-NEXT: uxth  r2, r0
+; CHECK-DSP-NEXT: cmp   r2, r3
+
+; With DSP-IMM, we could have:
+; movs  r1, #0
+; uxth  r0, r0
+; usub16  r1, r1, r0
+; ldrh  r0, [r2]
+; ldrh  r3, [r3]
+; usub16  r0, r0, r1
+; uadd16  r1, r3, r1
+; cmp r0, r1
+define i16 @dsp_trunc(i32 %arg0, i32 %arg1, i16* %gep0, i16* %gep1) {
+entry:
+  %add0 = add i32 %arg0, %arg1
+  %conv0 = trunc i32 %add0 to i16
+  %sub0 = sub i16 0, %conv0
+  %load0 = load i16, i16* %gep0, align 2
+  %load1 = load i16, i16* %gep1, align 2
+  %sub1 = sub i16 %load0, %sub0
+  %add1 = add i16 %load1, %sub0
+  %cmp = icmp ult i16 %sub1, %add1
+  %res = select i1 %cmp, i16 %add1, i16 %sub1
+  ret i16 %res
+}
+
+; CHECK-COMMON-LABEL: trunc_i16_i8
+; CHECK-COMMON: ldrh
+; CHECK-COMMON: uxtb
+; CHECK-COMMON: cmp
+define i8 @trunc_i16_i8(i16* %ptr, i16 zeroext %arg0, i8 zeroext %arg1) {
+entry:
+  %0 = load i16, i16* %ptr
+  %1 = add i16 %0, %arg0
+  %2 = trunc i16 %1 to i8
+  %3 = icmp ugt i8 %2, %arg1
+  %4 = select i1 %3, i8 %2, i8 %arg1
+  ret i8 %4
+}
+
+; The pass will bail because of the zext, otherwise we'd want something like:
+; ldrb [[LD:r[^ ]+]], [r0]
+; subs [[SUB:r[^ ]+]], [[LD]], #1
+; cmp [[LD]], [[SUB]]
+; CHECK-COMMON-LABEL: icmp_i32_zext:
+; CHECK-COMMON: uxtb
+define i8 @icmp_i32_zext(i8* %ptr) {
+entry:
+  %gep = getelementptr inbounds i8, i8* %ptr, i32 0
+  %0 = load i8, i8* %gep, align 1
+  %1 = sub nuw nsw i8 %0, 1
+  %conv44 = zext i8 %0 to i32
+  br label %preheader
+
+preheader:
+  br label %body
+
+body:
+  %2 = phi i8 [ %1, %preheader ], [ %3, %if.end ]
+  %si.0274 = phi i32 [ %conv44, %preheader ], [ %inc, %if.end ]
+  %conv51266 = zext i8 %2 to i32
+  %cmp52267 = icmp eq i32 %si.0274, %conv51266
+  br i1 %cmp52267, label %if.end, label %exit
+
+if.end:
+  %inc = add i32 %si.0274, 1
+  %gep1 = getelementptr inbounds i8, i8* %ptr, i32 %inc
+  %3 = load i8, i8* %gep1, align 1
+  br label %body
+
+exit:
+  ret i8 %2
+}
+
+; Won't handle zext or sext
+; CHECK-COMMON-LABEL: icmp_sext_zext_store_i8_i16
+define i32 @icmp_sext_zext_store_i8_i16() {
+entry:
+  %0 = load i8, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @d_uch, i32 0, i32 2), align 1
+  %conv = zext i8 %0 to i16
+  store i16 %conv, i16* @sh1, align 2
+  %conv1 = zext i8 %0 to i32
+  %1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @d_sh, i32 0, i32 2), align 2
+  %conv2 = sext i16 %1 to i32
+  %cmp = icmp eq i32 %conv1, %conv2
+  %conv3 = zext i1 %cmp to i32
+  ret i32 %conv3
+}
+
+; Pass will bail because of the zext, otherwise:
+; ldrb [[LD:r[^ ]+]], [r1]
+; subs [[SUB:r[^ ]+]], #1
+; cmp [[SUB]], #3
+; CHECK-COMMON-LABEL: or_icmp_ugt:
+; CHECK-COMMON: uxt
+define i1 @or_icmp_ugt(i32 %arg, i8* %ptr) {
+entry:
+  %0 = load i8, i8* %ptr
+  %1 = zext i8 %0 to i32
+  %mul = shl nuw nsw i32 %1, 1
+  %add0 = add nuw nsw i32 %mul, 6
+  %cmp0 = icmp ne i32 %arg, %add0
+  %add1 = add i8 %0, -1
+  %cmp1 = icmp ugt i8 %add1, 3
+  %or = or i1 %cmp0, %cmp1
+  ret i1 %or
+}
+
+; CHECK-COMMON-LABEL: icmp_switch_trunc:
+; CHECK-COMMON-NOT: uxt
+define i16 @icmp_switch_trunc(i16 zeroext %arg) {
+entry:
+  %conv = add nuw i16 %arg, 15
+  %mul = mul nuw nsw i16 %conv, 3
+  %trunc = trunc i16 %arg to i3
+  switch i3 %trunc, label %default [
+    i3 0, label %sw.bb
+    i3 1, label %sw.bb.i
+  ]
+
+sw.bb:
+  %cmp0 = icmp ult i16 %mul, 127
+  %select = select i1 %cmp0, i16 %mul, i16 127
+  br label %exit
+
+sw.bb.i:
+  %cmp1 = icmp ugt i16 %mul, 34
+  %select.i = select i1 %cmp1, i16 %mul, i16 34
+  br label %exit
+
+default:
+  br label %exit
+
+exit:
+  %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ]
+  ret i16 %res
+}
+
+; Pass will bail because of the zext
+; CHECK-COMMON-LABEL: urem_trunc_icmps
+; CHECK-COMMON: uxt
+define void @urem_trunc_icmps(i16** %in, i32* %g, i32* %k) {
+entry:
+  %ptr = load i16*, i16** %in, align 4
+  %ld = load i16, i16* %ptr, align 2
+  %cmp.i = icmp eq i16 %ld, 0
+  br i1 %cmp.i, label %exit, label %cond.false.i
+
+cond.false.i:
+  %rem = urem i16 5, %ld
+  %extract.t = trunc i16 %rem to i8
+  br label %body
+
+body:
+  %cond.in.i.off0 = phi i8 [ %extract.t, %cond.false.i ], [ %add, %for.inc ]
+  %cmp = icmp ugt i8 %cond.in.i.off0, 7
+  %conv5 = zext i1 %cmp to i32
+  store i32 %conv5, i32* %g, align 4
+  %.pr = load i32, i32* %k, align 4
+  %tobool13150 = icmp eq i32 %.pr, 0
+  br i1 %tobool13150, label %for.inc, label %exit
+
+for.inc:
+  %add = add nuw i8 %cond.in.i.off0, 1
+  br label %body
+
+exit:
+  ret void
+}
+
+; CHECK-COMMON-LABEL: phi_feeding_switch
+; CHECK-COMMON: ldrb
+; CHECK-COMMON: uxtb
+; CHECK-COMMON: uxtb
+define void @phi_feeding_switch(i8* %memblock, i8* %store, i16 %arg) {
+entry:
+  %pre = load i8, i8* %memblock, align 1
+  %conv = trunc i16 %arg to i8
+  br label %header
+
+header:
+  %phi.0 = phi i8 [ %pre, %entry ], [ %count, %latch ]
+  %phi.1 = phi i8 [ %conv, %entry ], [ %phi.3, %latch ]
+  %phi.2 = phi i8 [ 0, %entry], [ %count, %latch ]
+  switch i8 %phi.0, label %default [
+    i8 43, label %for.inc.i
+    i8 45, label %for.inc.i.i
+  ]
+
+for.inc.i:
+  %xor = xor i8 %phi.1, 1
+  br label %latch
+
+for.inc.i.i:
+  %and = and i8 %phi.1, 3
+  br label %latch
+
+default:
+  %sub = sub i8 %phi.0, 1
+  %cmp2 = icmp ugt i8 %sub, 4
+  br i1 %cmp2, label %latch, label %exit
+
+latch:
+  %phi.3 = phi i8 [ %xor, %for.inc.i ], [ %and, %for.inc.i.i ], [ %phi.2, %default ]
+  %count = add nuw i8 %phi.2, 1
+  store i8 %count, i8* %store, align 1
+  br label %header
+
+exit:
+  ret void
+}
+
+; Again, zexts will prevent the transform.
+; Check that %exp requires uxth in all cases, and will also be required to
+; promote %1 for the call - unless we can generate a uadd16.
+; CHECK-COMMON-LABEL: zext_load_sink_call:
+; CHECK-COMMON: uxt
+; uadd16
+; cmp
+; CHECK-COMMON: uxt
+define i32 @zext_load_sink_call(i16* %ptr, i16 %exp) {
+entry:
+  %0 = load i16, i16* %ptr, align 4
+  %1 = add i16 %exp, 3
+  %cmp = icmp eq i16 %0, %exp
+  br i1 %cmp, label %exit, label %if.then
+
+if.then:
+  %conv0 = zext i16 %0 to i32
+  %conv1 = zext i16 %1 to i32
+  %call = tail call arm_aapcs_vfpcc i32 @dummy(i32 %conv0, i32 %conv1)
+  br label %exit
+
+exit:
+  %exitval = phi i32 [ %call, %if.then ], [ 0, %entry  ]
+  ret i32 %exitval
+}
+
+%class.ae = type { i8 }
+%class.x = type { i8 }
+%class.v = type { %class.q }
+%class.q = type { i16 }
+
+; CHECK-COMMON-LABEL: trunc_i16_i9_switch
+; CHECK-COMMON-NOT: uxt
+define i32 @trunc_i16_i9_switch(%class.ae* %this) {
+entry:
+  %call = tail call %class.x* @_ZNK2ae2afEv(%class.ae* %this)
+  %call2 = tail call %class.v* @_ZN1x2acEv(%class.x* %call)
+  %0 = getelementptr inbounds %class.v, %class.v* %call2, i32 0, i32 0, i32 0
+  %1 = load i16, i16* %0, align 2
+  %2 = trunc i16 %1 to i9
+  %trunc = and i9 %2, -64
+  switch i9 %trunc, label %cleanup.fold.split [
+    i9 0, label %cleanup
+    i9 -256, label %if.then7
+  ]
+
+if.then7:
+  %3 = and i16 %1, 7
+  %tobool = icmp eq i16 %3, 0
+  %cond = select i1 %tobool, i32 2, i32 1
+  br label %cleanup
+
+cleanup.fold.split:
+  br label %cleanup
+
+cleanup:
+  %retval.0 = phi i32 [ %cond, %if.then7 ], [ 0, %entry ], [ 2, %cleanup.fold.split ]
+  ret i32 %retval.0
+}
+
+declare %class.x* @_ZNK2ae2afEv(%class.ae*) local_unnamed_addr
+declare %class.v* @_ZN1x2acEv(%class.x*) local_unnamed_addr
+declare i32 @dummy(i32, i32)
+
+@d_uch = hidden local_unnamed_addr global [16 x i8] zeroinitializer, align 1
+@sh1 = hidden local_unnamed_addr global i16 0, align 2
+@d_sh = hidden local_unnamed_addr global [16 x i16] zeroinitializer, align 2
author	Sam Parker <sam.parker@arm.com>	2018-08-10 13:57:13 +0000
committer	Sam Parker <sam.parker@arm.com>	2018-08-10 13:57:13 +0000
commit	8c4b964c5ac430455c1c817831056c3ace90d92c (patch)
tree	c76153ff41f1f0e726cc889613a5c93cd8440b56
parent	53ac1776f3be82a97f94591cc08c1864f5e24f4b (diff)
download	bcm5719-llvm-8c4b964c5ac430455c1c817831056c3ace90d92c.tar.gz bcm5719-llvm-8c4b964c5ac430455c1c817831056c3ace90d92c.zip