summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp92
1 files changed, 83 insertions, 9 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index ff89ce7b96f..d3d5c6dc9d9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -131,6 +131,9 @@ private:
// sin/cos
bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
+ // __read_pipe/__write_pipe
+ bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
+
// Get insertion point at entry.
BasicBlock::iterator getEntryIns(CallInst * UI);
// Insert an Alloc instruction.
@@ -458,11 +461,11 @@ static TableRef getOptTable(AMDGPULibFunc::EFuncId id) {
}
static inline int getVecSize(const AMDGPULibFunc& FInfo) {
- return FInfo.Leads[0].VectorSize;
+ return FInfo.getLeads()[0].VectorSize;
}
static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
- return (AMDGPULibFunc::EType)FInfo.Leads[0].ArgType;
+ return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
}
Constant *AMDGPULibCalls::getFunction(Module *M, const FuncInfo& fInfo) {
@@ -507,8 +510,8 @@ bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
Value *opr0 = aCI->getArgOperand(0);
AMDGPULibFunc nf;
- nf.Leads[0].ArgType = FInfo.Leads[0].ArgType;
- nf.Leads[0].VectorSize = FInfo.Leads[0].VectorSize;
+ nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
+ nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
nf.setPrefix(AMDGPULibFunc::NATIVE);
nf.setId(AMDGPULibFunc::EI_SIN);
@@ -537,11 +540,10 @@ bool AMDGPULibCalls::useNative(CallInst *aCI) {
Function *Callee = aCI->getCalledFunction();
FuncInfo FInfo;
- if (!parseFunctionName(Callee->getName(), &FInfo) ||
+ if (!parseFunctionName(Callee->getName(), &FInfo) || !FInfo.isMangled() ||
FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
- getArgType(FInfo) == AMDGPULibFunc::F64 ||
- !HasNative(FInfo.getId()) ||
- !(AllNative || useNativeFunc(FInfo.getName())) ) {
+ getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
+ !(AllNative || useNativeFunc(FInfo.getName()))) {
return false;
}
@@ -559,6 +561,73 @@ bool AMDGPULibCalls::useNative(CallInst *aCI) {
return true;
}
+// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
+// builtin, with appended type size and alignment arguments, where 2 or 4
+// indicates the original number of arguments. The library has optimized version
+// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
+// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
+// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
+// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
+bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
+ FuncInfo &FInfo) {
+ auto *Callee = CI->getCalledFunction();
+ if (!Callee->isDeclaration())
+ return false;
+
+ assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
+ auto *M = Callee->getParent();
+ auto &Ctx = M->getContext();
+ std::string Name = Callee->getName();
+ auto NumArg = CI->getNumArgOperands();
+ if (NumArg != 4 && NumArg != 6)
+ return false;
+ auto *PacketSize = CI->getArgOperand(NumArg - 2);
+ auto *PacketAlign = CI->getArgOperand(NumArg - 1);
+ if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
+ return false;
+ unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
+ unsigned Align = cast<ConstantInt>(PacketAlign)->getZExtValue();
+ if (Size != Align || !isPowerOf2_32(Size))
+ return false;
+
+ Type *PtrElemTy;
+ if (Size <= 8)
+ PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
+ else
+ PtrElemTy = VectorType::get(Type::getInt64Ty(Ctx), Size / 8);
+ unsigned PtrArgLoc = CI->getNumArgOperands() - 3;
+ auto PtrArg = CI->getArgOperand(PtrArgLoc);
+ unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
+ auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
+
+ SmallVector<llvm::Type *, 6> ArgTys;
+ for (unsigned I = 0; I != PtrArgLoc; ++I)
+ ArgTys.push_back(CI->getArgOperand(I)->getType());
+ ArgTys.push_back(PtrTy);
+
+ Name = Name + "_" + std::to_string(Size);
+ auto *FTy = FunctionType::get(Callee->getReturnType(),
+ ArrayRef<Type *>(ArgTys), false);
+ AMDGPULibFunc NewLibFunc(Name, FTy);
+ auto *F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
+ if (!F)
+ return false;
+
+ auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
+ SmallVector<Value *, 6> Args;
+ for (unsigned I = 0; I != PtrArgLoc; ++I)
+ Args.push_back(CI->getArgOperand(I));
+ Args.push_back(BCast);
+
+ auto *NCI = B.CreateCall(F, Args);
+ NCI->setAttributes(CI->getAttributes());
+ CI->replaceAllUsesWith(NCI);
+ CI->dropAllReferences();
+ CI->eraseFromParent();
+
+ return true;
+}
+
// This function returns false if no change; return true otherwise.
bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
this->CI = CI;
@@ -636,6 +705,11 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
return fold_sincos(CI, B, AA);
break;
+ case AMDGPULibFunc::EI_READ_PIPE_2:
+ case AMDGPULibFunc::EI_READ_PIPE_4:
+ case AMDGPULibFunc::EI_WRITE_PIPE_2:
+ case AMDGPULibFunc::EI_WRITE_PIPE_4:
+ return fold_read_write_pipe(CI, B, FInfo);
default:
break;
@@ -1259,7 +1333,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
// for OpenCL 2.0 we have only generic implementation of sincos
// function.
AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
- nf.Leads[0].PtrKind = AMDGPULibFunc::GENERIC;
+ nf.getLeads()[0].PtrKind = AMDGPULibFunc::GENERIC;
Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf));
if (!Fsincos) return false;
OpenPOWER on IntegriCloud