[AMDGPU] Promote uniform i16 ops to i32 ops for targets that have 16 bit instructions

Differential Revision: https://reviews.llvm.org/D24125 llvm-svn: 282624
author: Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> 2016-09-28 20:05:39 +0000
committer: Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> 2016-09-28 20:05:39 +0000
commit: e14df4b2365948f67069b9ec378852baf6c9da88 (patch)
tree: c2d2a90aeaea4a0799973ac14a8857bb1b72ea00 /llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
parent: b02043cd0f3287cf1c7d86995aae1c7941e993af (diff)
download: bcm5719-llvm-e14df4b2365948f67069b9ec378852baf6c9da88.tar.gz
bcm5719-llvm-e14df4b2365948f67069b9ec378852baf6c9da88.zip
1 files changed, 234 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index b955e231699..6304098639c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -39,6 +39,61 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
   Module *Mod;
   bool HasUnsafeFPMath;
 
+  /// \brief Copies exact/nsw/nuw flags (if any) from binary operator \p I to
+  /// binary operator \p V.
+  ///
+  /// \returns Binary operator \p V.
+  Value *copyFlags(const BinaryOperator &I, Value *V) const;
+
+  /// \returns Equivalent 16 bit integer type for given 32 bit integer type
+  /// \p T.
+  Type *getI16Ty(IRBuilder<> &B, const Type *T) const;
+
+  /// \returns Equivalent 32 bit integer type for given 16 bit integer type
+  /// \p T.
+  Type *getI32Ty(IRBuilder<> &B, const Type *T) const;
+
+  /// \returns True if the base element of type \p T is 16 bit integer, false
+  /// otherwise.
+  bool isI16Ty(const Type *T) const;
+
+  /// \returns True if the base element of type \p T is 32 bit integer, false
+  /// otherwise.
+  bool isI32Ty(const Type *T) const;
+
+  /// \returns True if binary operation \p I is a signed binary operation, false
+  /// otherwise.
+  bool isSigned(const BinaryOperator &I) const;
+
+  /// \returns True if the condition of 'select' operation \p I comes from a
+  /// signed 'icmp' operation, false otherwise.
+  bool isSigned(const SelectInst &I) const;
+
+  /// \brief Promotes uniform 16 bit binary operation \p I to equivalent 32 bit
+  /// binary operation by sign or zero extending operands to 32 bits, replacing
+  /// 16 bit operation with equivalent 32 bit operation, and truncating the
+  /// result of 32 bit operation back to 16 bits. 16 bit division operation is
+  /// not promoted.
+  ///
+  /// \returns True if 16 bit binary operation is promoted to equivalent 32 bit
+  /// binary operation, false otherwise.
+  bool promoteUniformI16OpToI32Op(BinaryOperator &I) const;
+
+  /// \brief Promotes uniform 16 bit 'icmp' operation \p I to 32 bit 'icmp'
+  /// operation by sign or zero extending operands to 32 bits, and replacing 16
+  /// bit operation with 32 bit operation.
+  ///
+  /// \returns True.
+  bool promoteUniformI16OpToI32Op(ICmpInst &I) const;
+
+  /// \brief Promotes uniform 16 bit 'select' operation \p I to 32 bit 'select'
+  /// operation by sign or zero extending operands to 32 bits, replacing 16 bit
+  /// operation with 32 bit operation, and truncating the result of 32 bit
+  /// operation back to 16 bits.
+  ///
+  /// \returns True.
+  bool promoteUniformI16OpToI32Op(SelectInst &I) const;
+
 public:
   static char ID;
   AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :
@@ -51,9 +106,10 @@ public:
 
   bool visitFDiv(BinaryOperator &I);
 
-  bool visitInstruction(Instruction &I) {
-    return false;
-  }
+  bool visitInstruction(Instruction &I) { return false; }
+  bool visitBinaryOperator(BinaryOperator &I);
+  bool visitICmpInst(ICmpInst &I);
+  bool visitSelectInst(SelectInst &I);
 
   bool doInitialization(Module &M) override;
   bool runOnFunction(Function &F) override;
@@ -70,6 +126,150 @@ public:
 
 } // End anonymous namespace
 
+Value *AMDGPUCodeGenPrepare::copyFlags(
+    const BinaryOperator &I, Value *V) const {
+  assert(isa<BinaryOperator>(V) && "V must be binary operator");
+
+  BinaryOperator *BinOp = cast<BinaryOperator>(V);
+  if (isa<OverflowingBinaryOperator>(BinOp)) {
+    BinOp->setHasNoSignedWrap(I.hasNoSignedWrap());
+    BinOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+  } else if (isa<PossiblyExactOperator>(BinOp)) {
+    BinOp->setIsExact(I.isExact());
+  }
+
+  return V;
+}
+
+Type *AMDGPUCodeGenPrepare::getI16Ty(IRBuilder<> &B, const Type *T) const {
+  assert(isI32Ty(T) && "T must be 32 bits");
+
+  if (T->isIntegerTy())
+    return B.getInt16Ty();
+  return VectorType::get(B.getInt16Ty(), cast<VectorType>(T)->getNumElements());
+}
+
+Type *AMDGPUCodeGenPrepare::getI32Ty(IRBuilder<> &B, const Type *T) const {
+  assert(isI16Ty(T) && "T must be 16 bits");
+
+  if (T->isIntegerTy())
+    return B.getInt32Ty();
+  return VectorType::get(B.getInt32Ty(), cast<VectorType>(T)->getNumElements());
+}
+
+bool AMDGPUCodeGenPrepare::isI16Ty(const Type *T) const {
+  if (T->isIntegerTy(16))
+    return true;
+  if (!T->isVectorTy())
+    return false;
+  return cast<VectorType>(T)->getElementType()->isIntegerTy(16);
+}
+
+bool AMDGPUCodeGenPrepare::isI32Ty(const Type *T) const {
+  if (T->isIntegerTy(32))
+    return true;
+  if (!T->isVectorTy())
+    return false;
+  return cast<VectorType>(T)->getElementType()->isIntegerTy(32);
+}
+
+bool AMDGPUCodeGenPrepare::isSigned(const BinaryOperator &I) const {
+  return I.getOpcode() == Instruction::SDiv ||
+      I.getOpcode() == Instruction::SRem;
+}
+
+bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
+  return isa<ICmpInst>(I.getOperand(0)) ?
+      cast<ICmpInst>(I.getOperand(0))->isSigned() : false;
+}
+
+bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(BinaryOperator &I) const {
+  assert(isI16Ty(I.getType()) && "Op must be 16 bits");
+
+  if (I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::UDiv)
+    return false;
+
+  IRBuilder<> Builder(&I);
+  Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+  Type *I32Ty = getI32Ty(Builder, I.getType());
+  Value *ExtOp0 = nullptr;
+  Value *ExtOp1 = nullptr;
+  Value *ExtRes = nullptr;
+  Value *TruncRes = nullptr;
+
+  if (isSigned(I)) {
+    ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
+    ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
+  } else {
+    ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
+    ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
+  }
+  ExtRes = copyFlags(I, Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1));
+  TruncRes = Builder.CreateTrunc(ExtRes, getI16Ty(Builder, ExtRes->getType()));
+
+  I.replaceAllUsesWith(TruncRes);
+  I.eraseFromParent();
+
+  return true;
+}
+
+bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(ICmpInst &I) const {
+  assert(isI16Ty(I.getOperand(0)->getType()) && "Op0 must be 16 bits");
+  assert(isI16Ty(I.getOperand(1)->getType()) && "Op1 must be 16 bits");
+
+  IRBuilder<> Builder(&I);
+  Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+  Type *I32TyOp0 = getI32Ty(Builder, I.getOperand(0)->getType());
+  Type *I32TyOp1 = getI32Ty(Builder, I.getOperand(1)->getType());
+  Value *ExtOp0 = nullptr;
+  Value *ExtOp1 = nullptr;
+  Value *NewICmp  = nullptr;
+
+  if (I.isSigned()) {
+    ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32TyOp0);
+    ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32TyOp1);
+  } else {
+    ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32TyOp0);
+    ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32TyOp1);
+  }
+  NewICmp = Builder.CreateICmp(I.getPredicate(), ExtOp0, ExtOp1);
+
+  I.replaceAllUsesWith(NewICmp);
+  I.eraseFromParent();
+
+  return true;
+}
+
+bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(SelectInst &I) const {
+  assert(isI16Ty(I.getType()) && "Op must be 16 bits");
+
+  IRBuilder<> Builder(&I);
+  Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+  Type *I32Ty = getI32Ty(Builder, I.getType());
+  Value *ExtOp1 = nullptr;
+  Value *ExtOp2 = nullptr;
+  Value *ExtRes = nullptr;
+  Value *TruncRes = nullptr;
+
+  if (isSigned(I)) {
+    ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
+    ExtOp2 = Builder.CreateSExt(I.getOperand(2), I32Ty);
+  } else {
+    ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
+    ExtOp2 = Builder.CreateZExt(I.getOperand(2), I32Ty);
+  }
+  ExtRes = Builder.CreateSelect(I.getOperand(0), ExtOp1, ExtOp2);
+  TruncRes = Builder.CreateTrunc(ExtRes, getI16Ty(Builder, ExtRes->getType()));
+
+  I.replaceAllUsesWith(TruncRes);
+  I.eraseFromParent();
+
+  return true;
+}
+
 static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {
   const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
   if (!CNum)
@@ -154,6 +354,37 @@ static bool hasUnsafeFPMath(const Function &F) {
   return Attr.getValueAsString() == "true";
 }
 
+bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
+  bool Changed = false;
+
+  // TODO: Should we promote smaller types that will be legalized to i16?
+  if (ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I))
+    Changed |= promoteUniformI16OpToI32Op(I);
+
+  return Changed;
+}
+
+bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) {
+  bool Changed = false;
+
+  // TODO: Should we promote smaller types that will be legalized to i16?
+  if (ST->has16BitInsts() && isI16Ty(I.getOperand(0)->getType()) &&
+          isI16Ty(I.getOperand(1)->getType()) && DA->isUniform(&I))
+    Changed |= promoteUniformI16OpToI32Op(I);
+
+  return Changed;
+}
+
+bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) {
+  bool Changed = false;
+
+  // TODO: Should we promote smaller types that will be legalized to i16?
+  if (ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I))
+    Changed |= promoteUniformI16OpToI32Op(I);
+
+  return Changed;
+}
+
 bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
   Mod = &M;
   return false;
author	Konstantin Zhuravlyov <kzhuravl_dev@outlook.com>	2016-09-28 20:05:39 +0000
committer	Konstantin Zhuravlyov <kzhuravl_dev@outlook.com>	2016-09-28 20:05:39 +0000
commit	e14df4b2365948f67069b9ec378852baf6c9da88 (patch)
tree	c2d2a90aeaea4a0799973ac14a8857bb1b72ea00 /llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
parent	b02043cd0f3287cf1c7d86995aae1c7941e993af (diff)
download	bcm5719-llvm-e14df4b2365948f67069b9ec378852baf6c9da88.tar.gz bcm5719-llvm-e14df4b2365948f67069b9ec378852baf6c9da88.zip