summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
diff options
context:
space:
mode:
authorKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2016-09-28 20:05:39 +0000
committerKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2016-09-28 20:05:39 +0000
commite14df4b2365948f67069b9ec378852baf6c9da88 (patch)
treec2d2a90aeaea4a0799973ac14a8857bb1b72ea00 /llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
parentb02043cd0f3287cf1c7d86995aae1c7941e993af (diff)
downloadbcm5719-llvm-e14df4b2365948f67069b9ec378852baf6c9da88.tar.gz
bcm5719-llvm-e14df4b2365948f67069b9ec378852baf6c9da88.zip
[AMDGPU] Promote uniform i16 ops to i32 ops for targets that have 16 bit instructions
Differential Revision: https://reviews.llvm.org/D24125 llvm-svn: 282624
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp237
1 files changed, 234 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index b955e231699..6304098639c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -39,6 +39,61 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
Module *Mod;
bool HasUnsafeFPMath;
+ /// \brief Copies exact/nsw/nuw flags (if any) from binary operator \p I to
+ /// binary operator \p V.
+ ///
+ /// \returns Binary operator \p V.
+ Value *copyFlags(const BinaryOperator &I, Value *V) const;
+
+ /// \returns Equivalent 16 bit integer type for given 32 bit integer type
+ /// \p T.
+ Type *getI16Ty(IRBuilder<> &B, const Type *T) const;
+
+ /// \returns Equivalent 32 bit integer type for given 16 bit integer type
+ /// \p T.
+ Type *getI32Ty(IRBuilder<> &B, const Type *T) const;
+
+ /// \returns True if the base element of type \p T is 16 bit integer, false
+ /// otherwise.
+ bool isI16Ty(const Type *T) const;
+
+ /// \returns True if the base element of type \p T is 32 bit integer, false
+ /// otherwise.
+ bool isI32Ty(const Type *T) const;
+
+ /// \returns True if binary operation \p I is a signed binary operation, false
+ /// otherwise.
+ bool isSigned(const BinaryOperator &I) const;
+
+ /// \returns True if the condition of 'select' operation \p I comes from a
+ /// signed 'icmp' operation, false otherwise.
+ bool isSigned(const SelectInst &I) const;
+
+ /// \brief Promotes uniform 16 bit binary operation \p I to equivalent 32 bit
+ /// binary operation by sign or zero extending operands to 32 bits, replacing
+ /// 16 bit operation with equivalent 32 bit operation, and truncating the
+ /// result of 32 bit operation back to 16 bits. 16 bit division operation is
+ /// not promoted.
+ ///
+ /// \returns True if 16 bit binary operation is promoted to equivalent 32 bit
+ /// binary operation, false otherwise.
+ bool promoteUniformI16OpToI32Op(BinaryOperator &I) const;
+
+ /// \brief Promotes uniform 16 bit 'icmp' operation \p I to 32 bit 'icmp'
+ /// operation by sign or zero extending operands to 32 bits, and replacing 16
+ /// bit operation with 32 bit operation.
+ ///
+ /// \returns True.
+ bool promoteUniformI16OpToI32Op(ICmpInst &I) const;
+
+ /// \brief Promotes uniform 16 bit 'select' operation \p I to 32 bit 'select'
+ /// operation by sign or zero extending operands to 32 bits, replacing 16 bit
+ /// operation with 32 bit operation, and truncating the result of 32 bit
+ /// operation back to 16 bits.
+ ///
+ /// \returns True.
+ bool promoteUniformI16OpToI32Op(SelectInst &I) const;
+
public:
static char ID;
AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :
@@ -51,9 +106,10 @@ public:
bool visitFDiv(BinaryOperator &I);
- bool visitInstruction(Instruction &I) {
- return false;
- }
+ bool visitInstruction(Instruction &I) { return false; }
+ bool visitBinaryOperator(BinaryOperator &I);
+ bool visitICmpInst(ICmpInst &I);
+ bool visitSelectInst(SelectInst &I);
bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;
@@ -70,6 +126,150 @@ public:
} // End anonymous namespace
+Value *AMDGPUCodeGenPrepare::copyFlags(
+ const BinaryOperator &I, Value *V) const {
+ assert(isa<BinaryOperator>(V) && "V must be binary operator");
+
+ BinaryOperator *BinOp = cast<BinaryOperator>(V);
+ if (isa<OverflowingBinaryOperator>(BinOp)) {
+ BinOp->setHasNoSignedWrap(I.hasNoSignedWrap());
+ BinOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ } else if (isa<PossiblyExactOperator>(BinOp)) {
+ BinOp->setIsExact(I.isExact());
+ }
+
+ return V;
+}
+
+Type *AMDGPUCodeGenPrepare::getI16Ty(IRBuilder<> &B, const Type *T) const {
+ assert(isI32Ty(T) && "T must be 32 bits");
+
+ if (T->isIntegerTy())
+ return B.getInt16Ty();
+ return VectorType::get(B.getInt16Ty(), cast<VectorType>(T)->getNumElements());
+}
+
+Type *AMDGPUCodeGenPrepare::getI32Ty(IRBuilder<> &B, const Type *T) const {
+ assert(isI16Ty(T) && "T must be 16 bits");
+
+ if (T->isIntegerTy())
+ return B.getInt32Ty();
+ return VectorType::get(B.getInt32Ty(), cast<VectorType>(T)->getNumElements());
+}
+
+bool AMDGPUCodeGenPrepare::isI16Ty(const Type *T) const {
+ if (T->isIntegerTy(16))
+ return true;
+ if (!T->isVectorTy())
+ return false;
+ return cast<VectorType>(T)->getElementType()->isIntegerTy(16);
+}
+
+bool AMDGPUCodeGenPrepare::isI32Ty(const Type *T) const {
+ if (T->isIntegerTy(32))
+ return true;
+ if (!T->isVectorTy())
+ return false;
+ return cast<VectorType>(T)->getElementType()->isIntegerTy(32);
+}
+
+bool AMDGPUCodeGenPrepare::isSigned(const BinaryOperator &I) const {
+ return I.getOpcode() == Instruction::SDiv ||
+ I.getOpcode() == Instruction::SRem;
+}
+
+bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
+ return isa<ICmpInst>(I.getOperand(0)) ?
+ cast<ICmpInst>(I.getOperand(0))->isSigned() : false;
+}
+
+bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(BinaryOperator &I) const {
+ assert(isI16Ty(I.getType()) && "Op must be 16 bits");
+
+ if (I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::UDiv)
+ return false;
+
+ IRBuilder<> Builder(&I);
+ Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+ Type *I32Ty = getI32Ty(Builder, I.getType());
+ Value *ExtOp0 = nullptr;
+ Value *ExtOp1 = nullptr;
+ Value *ExtRes = nullptr;
+ Value *TruncRes = nullptr;
+
+ if (isSigned(I)) {
+ ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
+ ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
+ } else {
+ ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
+ ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
+ }
+ ExtRes = copyFlags(I, Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1));
+ TruncRes = Builder.CreateTrunc(ExtRes, getI16Ty(Builder, ExtRes->getType()));
+
+ I.replaceAllUsesWith(TruncRes);
+ I.eraseFromParent();
+
+ return true;
+}
+
+bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(ICmpInst &I) const {
+ assert(isI16Ty(I.getOperand(0)->getType()) && "Op0 must be 16 bits");
+ assert(isI16Ty(I.getOperand(1)->getType()) && "Op1 must be 16 bits");
+
+ IRBuilder<> Builder(&I);
+ Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+ Type *I32TyOp0 = getI32Ty(Builder, I.getOperand(0)->getType());
+ Type *I32TyOp1 = getI32Ty(Builder, I.getOperand(1)->getType());
+ Value *ExtOp0 = nullptr;
+ Value *ExtOp1 = nullptr;
+ Value *NewICmp = nullptr;
+
+ if (I.isSigned()) {
+ ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32TyOp0);
+ ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32TyOp1);
+ } else {
+ ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32TyOp0);
+ ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32TyOp1);
+ }
+ NewICmp = Builder.CreateICmp(I.getPredicate(), ExtOp0, ExtOp1);
+
+ I.replaceAllUsesWith(NewICmp);
+ I.eraseFromParent();
+
+ return true;
+}
+
+bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(SelectInst &I) const {
+ assert(isI16Ty(I.getType()) && "Op must be 16 bits");
+
+ IRBuilder<> Builder(&I);
+ Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+ Type *I32Ty = getI32Ty(Builder, I.getType());
+ Value *ExtOp1 = nullptr;
+ Value *ExtOp2 = nullptr;
+ Value *ExtRes = nullptr;
+ Value *TruncRes = nullptr;
+
+ if (isSigned(I)) {
+ ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
+ ExtOp2 = Builder.CreateSExt(I.getOperand(2), I32Ty);
+ } else {
+ ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
+ ExtOp2 = Builder.CreateZExt(I.getOperand(2), I32Ty);
+ }
+ ExtRes = Builder.CreateSelect(I.getOperand(0), ExtOp1, ExtOp2);
+ TruncRes = Builder.CreateTrunc(ExtRes, getI16Ty(Builder, ExtRes->getType()));
+
+ I.replaceAllUsesWith(TruncRes);
+ I.eraseFromParent();
+
+ return true;
+}
+
static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {
const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
if (!CNum)
@@ -154,6 +354,37 @@ static bool hasUnsafeFPMath(const Function &F) {
return Attr.getValueAsString() == "true";
}
+bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
+ bool Changed = false;
+
+ // TODO: Should we promote smaller types that will be legalized to i16?
+ if (ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I))
+ Changed |= promoteUniformI16OpToI32Op(I);
+
+ return Changed;
+}
+
+bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) {
+ bool Changed = false;
+
+ // TODO: Should we promote smaller types that will be legalized to i16?
+ if (ST->has16BitInsts() && isI16Ty(I.getOperand(0)->getType()) &&
+ isI16Ty(I.getOperand(1)->getType()) && DA->isUniform(&I))
+ Changed |= promoteUniformI16OpToI32Op(I);
+
+ return Changed;
+}
+
+bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) {
+ bool Changed = false;
+
+ // TODO: Should we promote smaller types that will be legalized to i16?
+ if (ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I))
+ Changed |= promoteUniformI16OpToI32Op(I);
+
+ return Changed;
+}
+
bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
Mod = &M;
return false;
OpenPOWER on IntegriCloud