summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-06-17 17:57:50 +0000
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-06-17 17:57:50 +0000
commita9191c8492abe90dfb4141610d11b0d5d0d67f5d (patch)
tree07e3e16ebf2e456552d34783497e00f714307ed0 /llvm/lib
parent6d741f29ec8f41d949fb08578488abdfd1685f9b (diff)
downloadbcm5719-llvm-a9191c8492abe90dfb4141610d11b0d5d0d67f5d.tar.gz
bcm5719-llvm-a9191c8492abe90dfb4141610d11b0d5d0d67f5d.zip
[AMDGPU] gfx1010 wavefrontsize intrinsic folding
Differential Revision: https://reviews.llvm.org/D63206 llvm-svn: 363588
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp70
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp2
3 files changed, 59 insertions, 16 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 94dad0d7470..19a7b38004c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -53,7 +53,8 @@ FunctionPass *createSIMemoryLegalizerPass();
FunctionPass *createSIInsertWaitcntsPass();
FunctionPass *createSIPreAllocateWWMRegsPass();
FunctionPass *createSIFormMemoryClausesPass();
-FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &);
+FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &,
+ const TargetMachine *);
FunctionPass *createAMDGPUUseNativeCallsPass();
FunctionPass *createAMDGPUCodeGenPreparePass();
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 581e229b4a0..7156824638a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -15,6 +15,7 @@
#include "AMDGPU.h"
#include "AMDGPULibFunc.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/ADT/StringSet.h"
@@ -22,6 +23,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
@@ -29,6 +31,7 @@
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <vector>
#include <cmath>
@@ -65,6 +68,8 @@ private:
typedef llvm::AMDGPULibFunc FuncInfo;
+ const TargetMachine *TM;
+
// -fuse-native.
bool AllNative = false;
@@ -134,6 +139,9 @@ private:
// __read_pipe/__write_pipe
bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
+ // llvm.amdgcn.wavefrontsize
+ bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
+
// Get insertion point at entry.
BasicBlock::iterator getEntryIns(CallInst * UI);
// Insert an Alloc instruction.
@@ -152,6 +160,8 @@ protected:
}
public:
+ AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
+
bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
void initNativeFuncs();
@@ -166,15 +176,16 @@ namespace {
class AMDGPUSimplifyLibCalls : public FunctionPass {
- AMDGPULibCalls Simplifier;
-
const TargetOptions Options;
+ AMDGPULibCalls Simplifier;
+
public:
static char ID; // Pass identification
- AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions())
- : FunctionPass(ID), Options(Opt) {
+ AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions(),
+ const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), Options(Opt), Simplifier(TM) {
initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
}
@@ -639,14 +650,6 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
// Ignore indirect calls.
if (Callee == 0) return false;
- FuncInfo FInfo;
- if (!parseFunctionName(Callee->getName(), &FInfo))
- return false;
-
- // Further check the number of arguments to see if they match.
- if (CI->getNumArgOperands() != FInfo.getNumArgs())
- return false;
-
BasicBlock *BB = CI->getParent();
LLVMContext &Context = CI->getParent()->getContext();
IRBuilder<> B(Context);
@@ -658,6 +661,21 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
B.setFastMathFlags(FPOp->getFastMathFlags());
+ switch (Callee->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::amdgcn_wavefrontsize:
+ return !EnablePreLink && fold_wavefrontsize(CI, B);
+ }
+
+ FuncInfo FInfo;
+ if (!parseFunctionName(Callee->getName(), &FInfo))
+ return false;
+
+ // Further check the number of arguments to see if they match.
+ if (CI->getNumArgOperands() != FInfo.getNumArgs())
+ return false;
+
if (TDOFold(CI, FInfo))
return true;
@@ -1371,6 +1389,29 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
return true;
}
+bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
+ if (!TM)
+ return false;
+
+ StringRef CPU = TM->getTargetCPU();
+ StringRef Features = TM->getTargetFeatureString();
+ if ((CPU.empty() || CPU.equals_lower("generic")) &&
+ (Features.empty() ||
+ Features.find_lower("wavefrontsize") == StringRef::npos))
+ return false;
+
+ Function *F = CI->getParent()->getParent();
+ const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
+ unsigned N = ST.getWavefrontSize();
+
+ LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
+ << N << "\n");
+
+ CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
+ CI->eraseFromParent();
+ return true;
+}
+
// Get insertion point at entry.
BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
Function * Func = UI->getParent()->getParent();
@@ -1680,8 +1721,9 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
}
// Public interface to the Simplify LibCalls pass.
-FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt) {
- return new AMDGPUSimplifyLibCalls(Opt);
+FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt,
+ const TargetMachine *TM) {
+ return new AMDGPUSimplifyLibCalls(Opt, TM);
}
FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 41a075756ee..ae422940b25 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -432,7 +432,7 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
PM.add(llvm::createAMDGPUPropagateAttributesEarlyPass(this));
PM.add(llvm::createAMDGPUUseNativeCallsPass());
if (LibCallSimplify)
- PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt));
+ PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt, this));
});
Builder.addExtension(
OpenPOWER on IntegriCloud