[ARM] Disallow zexts in ARMCodeGenPrepare

Enabling ARMCodeGenPrepare by default caused a whole load of failures. This is due to zexts and truncs not being handled properly. ZExts are messy so it's just easier to disable for now and truncs are allowed only as 'sinks'. I still need to figure out why allowing them as 'sources' causes so many failures. The other main changes are that we are explicit in the types that we converting to, it's now always 'TypeSize'. Type support is also now performed while checking for valid opcodes as it unnecessarily complicated having the checks are different stages. I've moved the tests around too, so we have the zext and truncs in their own file as well as the overflowing opcode tests. Differential Revision: https://reviews.llvm.org/D50518 llvm-svn: 339432
author: Sam Parker <sam.parker@arm.com> 2018-08-10 13:57:13 +0000
committer: Sam Parker <sam.parker@arm.com> 2018-08-10 13:57:13 +0000
commit: 8c4b964c5ac430455c1c817831056c3ace90d92c (patch)
tree: c76153ff41f1f0e726cc889613a5c93cd8440b56 /llvm/lib
parent: 53ac1776f3be82a97f94591cc08c1864f5e24f4b (diff)
download: bcm5719-llvm-8c4b964c5ac430455c1c817831056c3ace90d92c.tar.gz
bcm5719-llvm-8c4b964c5ac430455c1c817831056c3ace90d92c.zip
1 files changed, 109 insertions, 165 deletions
diff --git a/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp b/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
index 24071277427..d0ff4441371 100644
--- a/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
+++ b/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
@@ -85,16 +85,15 @@ class ARMCodeGenPrepare : public FunctionPass {
   const ARMSubtarget *ST = nullptr;
   IRPromoter *Promoter = nullptr;
   std::set<Value*> AllVisited;
-  Type *OrigTy = nullptr;
-  unsigned TypeSize = 0;
 
-  bool isNarrowInstSupported(Instruction *I);
   bool isSupportedValue(Value *V);
   bool isLegalToPromote(Value *V);
   bool TryToPromote(Value *V);
 
 public:
   static char ID;
+  static unsigned TypeSize;
+  Type *OrigTy = nullptr;
 
   ARMCodeGenPrepare() : FunctionPass(ID) {}
 
@@ -126,65 +125,66 @@ static bool isSigned(Value *V) {
 /// dealing with icmps but allow any other integer that is <= 16 bits. Void
 /// types are accepted so we can handle switches.
 static bool isSupportedType(Value *V) {
-  if (V->getType()->isVoidTy())
+  LLVM_DEBUG(dbgs() << "ARM CGP: isSupportedType: " << *V << "\n");
+  Type *Ty = V->getType();
+  if (Ty->isVoidTy())
     return true;
 
-  const IntegerType *IntTy = dyn_cast<IntegerType>(V->getType());
-  if (!IntTy)
-    return false;
+  if (auto *Ld = dyn_cast<LoadInst>(V))
+    Ty = cast<PointerType>(Ld->getPointerOperandType())->getElementType();
 
-  // Don't try to promote boolean values.
-  if (IntTy->getBitWidth() == 1)
+  const IntegerType *IntTy = dyn_cast<IntegerType>(Ty);
+  if (!IntTy) {
+    LLVM_DEBUG(dbgs() << "ARM CGP: No, not an integer.\n");
     return false;
+  }
 
-  if (auto *ZExt = dyn_cast<ZExtInst>(V))
-    return isSupportedType(ZExt->getOperand(0));
+  return IntTy->getBitWidth() == ARMCodeGenPrepare::TypeSize;
+}
 
-  return IntTy->getBitWidth() <= 16;
+/// Return true if the given value is a leaf in the use-def chain, producing
+/// a narrow (i8, i16) value. These values will be zext to start the promotion
+/// of the tree to i32. We guarantee that these won't populate the upper bits
+/// of the register. ZExt on the loads will be free, and the same for call
+/// return values because we only accept ones that guarantee a zeroext ret val.
+/// Many arguments will have the zeroext attribute too, so those would be free
+/// too.
+static bool isSource(Value *V) {
+  // TODO Allow truncs and zext to be sources.
+  if (isa<Argument>(V))
+    return true;
+  else if (isa<LoadInst>(V))
+    return true;
+  else if (auto *Call = dyn_cast<CallInst>(V))
+    return Call->hasRetAttr(Attribute::AttrKind::ZExt);
+  return false;
 }
 
 /// Return true if V will require any promoted values to be truncated for the
-/// use to be valid.
+/// the IR to remain valid. We can't mutate the value type of these
+/// instructions.
 static bool isSink(Value *V) {
+  // TODO The truncate also isn't actually necessary because we would already
+  // proved that the data value is kept within the range of the original data
+  // type.
   auto UsesNarrowValue = [](Value *V) {
-    return V->getType()->getScalarSizeInBits() <= 32;
+    return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize;
   };
 
   if (auto *Store = dyn_cast<StoreInst>(V))
     return UsesNarrowValue(Store->getValueOperand());
   if (auto *Return = dyn_cast<ReturnInst>(V))
     return UsesNarrowValue(Return->getReturnValue());
+  if (auto *Trunc = dyn_cast<TruncInst>(V))
+    return UsesNarrowValue(Trunc->getOperand(0));
 
   return isa<CallInst>(V);
 }
 
-/// Return true if the given value is a leaf that will need to be zext'd.
-static bool isSource(Value *V) {
-  if (isa<Argument>(V) && isSupportedType(V))
-    return true;
-  else if (isa<TruncInst>(V))
-    return true;
-  else if (auto *ZExt = dyn_cast<ZExtInst>(V))
-    // ZExt can be a leaf if its the only user of a load.
-    return isa<LoadInst>(ZExt->getOperand(0)) &&
-                         ZExt->getOperand(0)->hasOneUse();
-  else if (auto *Call = dyn_cast<CallInst>(V))
-    return Call->hasRetAttr(Attribute::AttrKind::ZExt);
-  else if (auto *Load = dyn_cast<LoadInst>(V)) {
-    if (!isa<IntegerType>(Load->getType()))
-      return false;
-    // A load is a leaf, unless its already just being zext'd.
-    if (Load->hasOneUse() && isa<ZExtInst>(*Load->use_begin()))
-      return false;
-
-    return true;
-  }
-  return false;
-}
-
 /// Return whether the instruction can be promoted within any modifications to
 /// it's operands or result.
 static bool isSafeOverflow(Instruction *I) {
+  // FIXME Do we need NSW too?
   if (isa<OverflowingBinaryOperator>(I) && I->hasNoUnsignedWrap())
     return true;
 
@@ -222,19 +222,18 @@ static bool isSafeOverflow(Instruction *I) {
 }
 
 static bool shouldPromote(Value *V) {
-  auto *I = dyn_cast<Instruction>(V);
-  if (!I)
+  if (!isa<IntegerType>(V->getType()) || isSink(V))
     return false;
 
-  if (!isa<IntegerType>(V->getType()))
-    return false;
+  if (isSource(V))
+    return true;
 
-  if (isa<StoreInst>(I) || isa<TerminatorInst>(I) || isa<TruncInst>(I) ||
-      isa<ICmpInst>(I))
+  auto *I = dyn_cast<Instruction>(V);
+  if (!I)
     return false;
 
-  if (auto *ZExt = dyn_cast<ZExtInst>(I))
-    return !ZExt->getDestTy()->isIntegerTy(32);
+  if (isa<ICmpInst>(I))
+    return false;
 
   return true;
 }
@@ -262,7 +261,7 @@ static bool isPromotedResultSafe(Value *V) {
 /// Return the intrinsic for the instruction that can perform the same
 /// operation but on a narrow type. This is using the parallel dsp intrinsics
 /// on scalar values.
-static Intrinsic::ID getNarrowIntrinsic(Instruction *I, unsigned TypeSize) {
+static Intrinsic::ID getNarrowIntrinsic(Instruction *I) {
   // Whether we use the signed or unsigned versions of these intrinsics
   // doesn't matter because we're not using the GE bits that they set in
   // the APSR.
@@ -270,10 +269,10 @@ static Intrinsic::ID getNarrowIntrinsic(Instruction *I, unsigned TypeSize) {
   default:
     break;
   case Instruction::Add:
-    return TypeSize == 16 ? Intrinsic::arm_uadd16 :
+    return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_uadd16 :
       Intrinsic::arm_uadd8;
   case Instruction::Sub:
-    return TypeSize == 16 ? Intrinsic::arm_usub16 :
+    return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_usub16 :
       Intrinsic::arm_usub8;
   }
   llvm_unreachable("unhandled opcode for narrow intrinsic");
@@ -285,10 +284,9 @@ void IRPromoter::Mutate(Type *OrigTy,
                         SmallPtrSetImpl<Instruction*> &Roots) {
   IRBuilder<> Builder{Ctx};
   Type *ExtTy = Type::getInt32Ty(M->getContext());
-  unsigned TypeSize = OrigTy->getPrimitiveSizeInBits();
   SmallPtrSet<Value*, 8> Promoted;
-  LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from " << TypeSize
-        << " to 32-bits\n");
+  LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
+             << ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
 
   auto ReplaceAllUsersOfWith = [&](Value *From, Value *To) {
     SmallVector<Instruction*, 4> Users;
@@ -325,7 +323,7 @@ void IRPromoter::Mutate(Type *OrigTy,
     LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for "
                << *I << "\n");
     Function *DSPInst =
-      Intrinsic::getDeclaration(M, getNarrowIntrinsic(I, TypeSize));
+      Intrinsic::getDeclaration(M, getNarrowIntrinsic(I));
     Builder.SetInsertPoint(I);
     Builder.SetCurrentDebugLocation(I->getDebugLoc());
     Value *Args[] = { I->getOperand(0), I->getOperand(1) };
@@ -353,9 +351,7 @@ void IRPromoter::Mutate(Type *OrigTy,
   LLVM_DEBUG(dbgs() << "ARM CGP: Promoting leaves:\n");
   for (auto V : Leaves) {
     LLVM_DEBUG(dbgs() << " - " << *V << "\n");
-    if (auto *ZExt = dyn_cast<ZExtInst>(V))
-      ZExt->mutateType(ExtTy);
-    else if (auto *I = dyn_cast<Instruction>(V))
+    if (auto *I = dyn_cast<Instruction>(V))
       InsertZExt(I, I);
     else if (auto *Arg = dyn_cast<Argument>(V)) {
       BasicBlock &BB = Arg->getParent()->front();
@@ -401,17 +397,9 @@ void IRPromoter::Mutate(Type *OrigTy,
   for (auto *V : Visited) {
     if (Leaves.count(V))
       continue;
-    if (auto *ZExt = dyn_cast<ZExtInst>(V)) {
-      if (ZExt->getDestTy() != ExtTy) {
-        ZExt->mutateType(ExtTy);
-        Promoted.insert(ZExt);
-      }
-      else if (ZExt->getSrcTy() == ExtTy) {
-        ReplaceAllUsersOfWith(V, ZExt->getOperand(0));
-        InstsToRemove.push_back(ZExt);
-      }
+
+    if (!isa<Instruction>(V))
       continue;
-    }
 
     if (!shouldPromote(V) || isPromotedResultSafe(V))
       continue;
@@ -459,30 +447,6 @@ void IRPromoter::Mutate(Type *OrigTy,
   LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete.\n");
 }
 
-bool ARMCodeGenPrepare::isNarrowInstSupported(Instruction *I) {
-  if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I))
-    return false;
-
-  if (ST->isThumb() && !ST->hasThumb2())
-    return false;
-
-  if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub)
-    return false;
-
-  // TODO
-  // Would it be profitable? For Thumb code, these parallel DSP instructions
-  // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
-  // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
-  // halved. They also do not take immediates as operands.
-  for (auto &Op : I->operands()) {
-    if (isa<Constant>(Op)) {
-      if (!EnableDSPWithImms)
-        return false;
-    }
-  }
-  return true;
-}
-
 /// We accept most instructions, as well as Arguments and ConstantInsts. We
 /// Disallow casts other than zext and truncs and only allow calls if their
 /// return value is zeroext. We don't allow opcodes that can introduce sign
@@ -490,42 +454,42 @@ bool ARMCodeGenPrepare::isNarrowInstSupported(Instruction *I) {
 bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
   LLVM_DEBUG(dbgs() << "ARM CGP: Is " << *V << " supported?\n");
 
-  // Non-instruction values that we can handle.
-  if (isa<ConstantInt>(V) || isa<Argument>(V))
-    return true;
+  if (auto *ICmp = dyn_cast<ICmpInst>(V))
+    return ICmp->isEquality() || !ICmp->isSigned();
 
   // Memory instructions
-  if (isa<StoreInst>(V) || isa<LoadInst>(V) || isa<GetElementPtrInst>(V))
+  if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V))
     return true;
 
   // Branches and targets.
-  if (auto *ICmp = dyn_cast<ICmpInst>(V))
-    return ICmp->isEquality() || !ICmp->isSigned();
-
   if( isa<BranchInst>(V) || isa<SwitchInst>(V) || isa<BasicBlock>(V))
     return true;
 
-  if (isa<PHINode>(V) || isa<SelectInst>(V) || isa<ReturnInst>(V))
-    return true;
+  // Non-instruction values that we can handle.
+  if (isa<ConstantInt>(V) || isa<Argument>(V))
+    return isSupportedType(V);
+
+  if (isa<PHINode>(V) || isa<SelectInst>(V) || isa<ReturnInst>(V) ||
+      isa<LoadInst>(V))
+    return isSupportedType(V);
+
+  // Currently, Trunc is the only cast we support.
+  if (auto *Trunc = dyn_cast<TruncInst>(V))
+    return isSupportedType(Trunc->getOperand(0));
 
   // Special cases for calls as we need to check for zeroext
   // TODO We should accept calls even if they don't have zeroext, as they can
   // still be roots.
   if (auto *Call = dyn_cast<CallInst>(V))
-    return Call->hasRetAttr(Attribute::AttrKind::ZExt);
-  else if (auto *Cast = dyn_cast<CastInst>(V)) {
-    if (isa<ZExtInst>(Cast))
-      return Cast->getDestTy()->getScalarSizeInBits() <= 32;
-    else if (auto *Trunc = dyn_cast<TruncInst>(V))
-      return Trunc->getDestTy()->getScalarSizeInBits() <= TypeSize;
-    else {
-      LLVM_DEBUG(dbgs() << "ARM CGP: No, unsupported cast.\n");
-      return false;
-    }
-  } else if (!isa<BinaryOperator>(V)) {
+    return isSupportedType(Call) &&
+           Call->hasRetAttr(Attribute::AttrKind::ZExt);
+
+  if (!isa<BinaryOperator>(V)) {
     LLVM_DEBUG(dbgs() << "ARM CGP: No, not a binary operator.\n");
     return false;
   }
+  if (!isSupportedType(V))
+    return false;
 
   bool res = !isSigned(V);
   if (!res)
@@ -537,39 +501,49 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
 /// smaller than the targeted promoted type. Check that we're not trying to
 /// promote something larger than our base 'TypeSize' type.
 bool ARMCodeGenPrepare::isLegalToPromote(Value *V) {
-  if (!isSupportedType(V))
-    return false;
+  if (isPromotedResultSafe(V))
+    return true;
 
-  unsigned VSize = 0;
-  if (auto *Ld = dyn_cast<LoadInst>(V)) {
-    auto *PtrTy = cast<PointerType>(Ld->getPointerOperandType());
-    VSize = PtrTy->getElementType()->getPrimitiveSizeInBits();
-  } else if (auto *ZExt = dyn_cast<ZExtInst>(V)) {
-    VSize = ZExt->getOperand(0)->getType()->getPrimitiveSizeInBits();
-  } else {
-    VSize = V->getType()->getPrimitiveSizeInBits();
-  }
+  auto *I = dyn_cast<Instruction>(V);
+  if (!I)
+    return false;
 
-  if (VSize > TypeSize)
+  // If promotion is not safe, can we use a DSP instruction to natively
+  // handle the narrow type?
+  if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I))
     return false;
 
-  if (isPromotedResultSafe(V))
-    return true;
+  if (ST->isThumb() && !ST->hasThumb2())
+    return false;
 
-  if (auto *I = dyn_cast<Instruction>(V))
-    return isNarrowInstSupported(I);
+  if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub)
+    return false;
 
-  return false;
+  // TODO
+  // Would it be profitable? For Thumb code, these parallel DSP instructions
+  // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
+  // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
+  // halved. They also do not take immediates as operands.
+  for (auto &Op : I->operands()) {
+    if (isa<Constant>(Op)) {
+      if (!EnableDSPWithImms)
+        return false;
+    }
+  }
+  return true;
 }
 
 bool ARMCodeGenPrepare::TryToPromote(Value *V) {
   OrigTy = V->getType();
   TypeSize = OrigTy->getPrimitiveSizeInBits();
+  if (TypeSize > 16)
+    return false;
 
   if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
     return false;
 
-  LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << "\n");
+  LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << ", TypeSize = "
+             << TypeSize << "\n");
 
   SetVector<Value*> WorkList;
   SmallPtrSet<Value*, 8> Leaves;
@@ -584,6 +558,10 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
     if (CurrentVisited.count(V))
       return true;
 
+    // Ignore pointer value that aren't instructions.
+    if (!isa<Instruction>(V) && isa<PointerType>(V->getType()))
+      return true;
+
     if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
       LLVM_DEBUG(dbgs() << "ARM CGP: Can't handle: " << *V << "\n");
       return false;
@@ -638,41 +616,10 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
     }
   }
 
-  unsigned NumToPromote = 0;
-  unsigned Cost = 0;
-  for (auto *V : CurrentVisited) {
-    // Truncs will cause a uxt and no zeroext arguments will often require
-    // a uxt somewhere.
-    if (isa<TruncInst>(V))
-      ++Cost;
-    else if (auto *Arg = dyn_cast<Argument>(V)) {
-      if (!Arg->hasZExtAttr())
-        ++Cost;
-    }
-
-    // Mem ops can automatically be extended/truncated and non-instructions
-    // don't need anything done.
-    if (Leaves.count(V) || isa<StoreInst>(V) || !isa<Instruction>(V))
-      continue;
-
-    // Will need to truncate calls args and returns.
-    if (Roots.count(cast<Instruction>(V))) {
-      ++Cost;
-      continue;
-    }
-
-    if (shouldPromote(V))
-      ++NumToPromote;
-  }
-
   LLVM_DEBUG(dbgs() << "ARM CGP: Visited nodes:\n";
              for (auto *I : CurrentVisited)
                I->dump();
              );
-  LLVM_DEBUG(dbgs() << "ARM CGP: Cost of promoting " << NumToPromote
-             << " instructions = " << Cost << "\n");
-  if (Cost > NumToPromote || (NumToPromote == 0))
-    return false;
 
   Promoter->Mutate(OrigTy, CurrentVisited, Leaves, Roots);
   return true;
@@ -712,12 +659,8 @@ bool ARMCodeGenPrepare::runOnFunction(Function &F) {
 
         LLVM_DEBUG(dbgs() << "ARM CGP: Searching from: " << CI << "\n");
         for (auto &Op : CI.operands()) {
-          if (auto *I = dyn_cast<Instruction>(Op)) {
-            if (isa<ZExtInst>(I))
-              MadeChange |= TryToPromote(I->getOperand(0));
-            else
-              MadeChange |= TryToPromote(I);
-          }
+          if (auto *I = dyn_cast<Instruction>(Op))
+            MadeChange |= TryToPromote(I);
         }
       }
     }
@@ -744,6 +687,7 @@ INITIALIZE_PASS_END(ARMCodeGenPrepare, DEBUG_TYPE, "ARM IR optimizations",
                     false, false)
 
 char ARMCodeGenPrepare::ID = 0;
+unsigned ARMCodeGenPrepare::TypeSize = 0;
 
 FunctionPass *llvm::createARMCodeGenPreparePass() {
   return new ARMCodeGenPrepare();
author	Sam Parker <sam.parker@arm.com>	2018-08-10 13:57:13 +0000
committer	Sam Parker <sam.parker@arm.com>	2018-08-10 13:57:13 +0000
commit	8c4b964c5ac430455c1c817831056c3ace90d92c (patch)
tree	c76153ff41f1f0e726cc889613a5c93cd8440b56 /llvm/lib
parent	53ac1776f3be82a97f94591cc08c1864f5e24f4b (diff)
download	bcm5719-llvm-8c4b964c5ac430455c1c817831056c3ace90d92c.tar.gz bcm5719-llvm-8c4b964c5ac430455c1c817831056c3ace90d92c.zip