summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-06-13 23:47:36 +0000
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-06-13 23:47:36 +0000
commit68a2fef9ae5beadd0ca7974d936e98caa04aa085 (patch)
treeea66f916f5ab7b128571ba39eb8227afc23e83b8 /llvm/lib
parent347ec0faa79a26dd1ba2e896b9acb18d8d05fdfc (diff)
downloadbcm5719-llvm-68a2fef9ae5beadd0ca7974d936e98caa04aa085.tar.gz
bcm5719-llvm-68a2fef9ae5beadd0ca7974d936e98caa04aa085.zip
[AMDGPU] gfx1010 wave32 icmp/fcmp intrinsic changes for wave32
Differential Revision: https://reviews.llvm.org/D63301 llvm-svn: 363339
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp44
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp32
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td7
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp4
6 files changed, 69 insertions, 25 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index 4756a77a7ec..1c503c29d55 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -249,7 +249,8 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
// We need to know how many lanes are active within the wavefront, and we do
// this by doing a ballot of active lanes.
CallInst *const Ballot =
- B.CreateIntrinsic(Intrinsic::amdgcn_icmp, {B.getInt32Ty()},
+ B.CreateIntrinsic(Intrinsic::amdgcn_icmp,
+ {B.getInt64Ty(), B.getInt32Ty()},
{B.getInt32(1), B.getInt32(0), B.getInt32(33)});
// We need to know how many lanes are active within the wavefront that are
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index bccbcd4da16..ca8dc8c07c6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -1028,6 +1028,10 @@ public:
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const override;
+ bool isWave32() const {
+ return WavefrontSize == 32;
+ }
+
/// \returns Maximum number of work groups per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 15ab80b756e..b764ca7d706 100644
--- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -12,11 +12,13 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
@@ -55,13 +57,13 @@ class SIAnnotateControlFlow : public FunctionPass {
Type *Boolean;
Type *Void;
- Type *Int64;
+ Type *IntMask;
Type *ReturnStruct;
ConstantInt *BoolTrue;
ConstantInt *BoolFalse;
UndefValue *BoolUndef;
- Constant *Int64Zero;
+ Constant *IntMaskZero;
Function *If;
Function *Else;
@@ -74,6 +76,8 @@ class SIAnnotateControlFlow : public FunctionPass {
LoopInfo *LI;
+ void initialize(Module &M, const GCNSubtarget &ST);
+
bool isUniform(BranchInst *T);
bool isTopOfStack(BasicBlock *BB);
@@ -103,8 +107,6 @@ public:
SIAnnotateControlFlow() : FunctionPass(ID) {}
- bool doInitialization(Module &M) override;
-
bool runOnFunction(Function &F) override;
StringRef getPassName() const override { return "SI annotate control flow"; }
@@ -114,6 +116,7 @@ public:
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LegacyDivergenceAnalysis>();
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
FunctionPass::getAnalysisUsage(AU);
}
};
@@ -124,31 +127,34 @@ INITIALIZE_PASS_BEGIN(SIAnnotateControlFlow, DEBUG_TYPE,
"Annotate SI Control Flow", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(SIAnnotateControlFlow, DEBUG_TYPE,
"Annotate SI Control Flow", false, false)
char SIAnnotateControlFlow::ID = 0;
/// Initialize all the types and constants used in the pass
-bool SIAnnotateControlFlow::doInitialization(Module &M) {
+void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
LLVMContext &Context = M.getContext();
Void = Type::getVoidTy(Context);
Boolean = Type::getInt1Ty(Context);
- Int64 = Type::getInt64Ty(Context);
- ReturnStruct = StructType::get(Boolean, Int64);
+ IntMask = ST.isWave32() ? Type::getInt32Ty(Context)
+ : Type::getInt64Ty(Context);
+ ReturnStruct = StructType::get(Boolean, IntMask);
BoolTrue = ConstantInt::getTrue(Context);
BoolFalse = ConstantInt::getFalse(Context);
BoolUndef = UndefValue::get(Boolean);
- Int64Zero = ConstantInt::get(Int64, 0);
-
- If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if);
- Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else);
- IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break);
- Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop);
- EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf);
- return false;
+ IntMaskZero = ConstantInt::get(IntMask, 0);
+
+ If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if, { IntMask });
+ Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else,
+ { IntMask, IntMask });
+ IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break,
+ { IntMask, IntMask });
+ Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop, { IntMask });
+ EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf, { IntMask });
}
/// Is the branch condition uniform or did the StructurizeCFG pass
@@ -258,14 +264,14 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
return;
BasicBlock *Target = Term->getSuccessor(1);
- PHINode *Broken = PHINode::Create(Int64, 0, "phi.broken", &Target->front());
+ PHINode *Broken = PHINode::Create(IntMask, 0, "phi.broken", &Target->front());
Value *Cond = Term->getCondition();
Term->setCondition(BoolTrue);
Value *Arg = handleLoopCondition(Cond, Broken, L, Term);
for (BasicBlock *Pred : predecessors(Target)) {
- Value *PHIValue = Int64Zero;
+ Value *PHIValue = IntMaskZero;
if (Pred == BB) // Remember the value of the previous iteration.
PHIValue = Arg;
// If the backedge from Pred to Target could be executed before the exit
@@ -316,6 +322,10 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DA = &getAnalysis<LegacyDivergenceAnalysis>();
+ TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+ const TargetMachine &TM = TPC.getTM<TargetMachine>();
+
+ initialize(*F.getParent(), TM.getSubtarget<GCNSubtarget>(F));
for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
E = df_end(&F.getEntryBlock()); I != E; ++I) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 3bb1ddc6703..92ca105af25 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3839,7 +3839,6 @@ static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
-
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
@@ -3855,8 +3854,14 @@ static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
ISD::CondCode CCOpcode = getICmpCondCode(IcInput);
- return DAG.getNode(AMDGPUISD::SETCC, DL, VT, LHS, RHS,
- DAG.getCondCode(CCOpcode));
+ unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize();
+ EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize);
+
+ SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, DL, CCVT, LHS, RHS,
+ DAG.getCondCode(CCOpcode));
+ if (VT.bitsEq(CCVT))
+ return SetCC;
+ return DAG.getZExtOrTrunc(SetCC, DL, VT);
}
static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
@@ -3882,8 +3887,13 @@ static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
- return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src0,
- Src1, DAG.getCondCode(CCOpcode));
+ unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize();
+ EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize);
+ SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, SL, CCVT, Src0,
+ Src1, DAG.getCondCode(CCOpcode));
+ if (VT.bitsEq(CCVT))
+ return SetCC;
+ return DAG.getZExtOrTrunc(SetCC, SL, VT);
}
void SITargetLowering::ReplaceNodeResults(SDNode *N,
@@ -5394,6 +5404,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDZ);
+ case Intrinsic::amdgcn_wavefrontsize:
+ return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
+ SDLoc(Op), MVT::i32);
case Intrinsic::amdgcn_s_buffer_load: {
unsigned Cache = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2),
@@ -5598,6 +5611,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_fmad_ftz:
return DAG.getNode(AMDGPUISD::FMAD_FTZ, DL, VT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::amdgcn_if_break:
+ return SDValue(DAG.getMachineNode(AMDGPU::SI_IF_BREAK, DL, VT,
+ Op->getOperand(1), Op->getOperand(2)), 0);
+
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
@@ -6495,6 +6513,10 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
M->getMemoryVT(), M->getMemOperand());
}
+ case Intrinsic::amdgcn_end_cf:
+ return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other,
+ Op->getOperand(2), Chain), 0);
+
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 4a3e8b3e36b..e6b64ecbfce 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -604,7 +604,12 @@ def : Pat <
// TODO: we could add more variants for other types of conditionals
def : Pat <
- (int_amdgcn_icmp i1:$src, (i1 0), (i32 33)),
+ (i64 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))),
+ (COPY $src) // Return the SGPRs representing i1 src
+>;
+
+def : Pat <
+ (i32 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))),
(COPY $src) // Return the SGPRs representing i1 src
>;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 2c8fa20b259..f167762b602 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3733,7 +3733,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
Function *NewF =
- Intrinsic::getDeclaration(II->getModule(), NewIID, SrcLHS->getType());
+ Intrinsic::getDeclaration(II->getModule(), NewIID,
+ { II->getType(),
+ SrcLHS->getType() });
Value *Args[] = { SrcLHS, SrcRHS,
ConstantInt::get(CC->getType(), SrcPred) };
CallInst *NewCall = Builder.CreateCall(NewF, Args);
OpenPOWER on IntegriCloud