diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-02-09 22:00:42 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-02-09 22:00:42 +0000 |
commit | 0699ef39ce8498ea6ee625babfefabd3f648776a (patch) | |
tree | a5f44cc462c597ae5d70a8b33207b5e098e96033 /llvm/lib/Target/AMDGPU | |
parent | d7dd65ad7c0eb8ba0345154af23b10f2c4be4e8e (diff) | |
download | bcm5719-llvm-0699ef39ce8498ea6ee625babfefabd3f648776a.tar.gz bcm5719-llvm-0699ef39ce8498ea6ee625babfefabd3f648776a.zip |
AMDGPU: Add pass to expand memcpy/memmove/memset
llvm-svn: 294635
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp | 123 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 |
5 files changed, 136 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 0916bdd2fcb..c06c9f30a5f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -51,6 +51,10 @@ ModulePass *createAMDGPUAnnotateKernelFeaturesPass(); void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); extern char &AMDGPUAnnotateKernelFeaturesID; +ModulePass *createAMDGPULowerIntrinsicsPass(); +void initializeAMDGPULowerIntrinsicsPass(PassRegistry &); +extern char &AMDGPULowerIntrinsicsID; + void initializeSIFoldOperandsPass(PassRegistry &); extern char &SIFoldOperandsID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index d9b0b500945..12c17c3e9eb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -461,10 +461,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, // N > 4 stores on the same chain. GatherAllAliasesMaxDepth = 16; - // FIXME: Need to really handle these. - MaxStoresPerMemcpy = 4096; - MaxStoresPerMemmove = 4096; - MaxStoresPerMemset = 4096; + // memcpy/memmove/memset are expanded in the IR, so we shouldn't need to worry + // about these during lowering. + MaxStoresPerMemcpy = 0xffffffff; + MaxStoresPerMemmove = 0xffffffff; + MaxStoresPerMemset = 0xffffffff; setTargetDAGCombine(ISD::BITCAST); setTargetDAGCombine(ISD::SHL); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp new file mode 100644 index 00000000000..f5ed66bcebc --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp @@ -0,0 +1,123 @@ +//===-- AMDGPULowerIntrinsics.cpp -----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" + +#define DEBUG_TYPE "amdgpu-lower-intrinsics" + +using namespace llvm; + +namespace { + +const unsigned MaxStaticSize = 1024; + +class AMDGPULowerIntrinsics : public ModulePass { +public: + static char ID; + + AMDGPULowerIntrinsics() : ModulePass(ID) { } + bool runOnModule(Module &M) override; + StringRef getPassName() const override { + return "AMDGPU Lower Intrinsics"; + } +}; + +} + +char AMDGPULowerIntrinsics::ID = 0; + +char &llvm::AMDGPULowerIntrinsicsID = AMDGPULowerIntrinsics::ID; + +INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, + "Lower intrinsics", false, false) + +// TODO: Should refine based on estimated number of accesses (e.g. does it +// require splitting based on alignment) +static bool shouldExpandOperationWithSize(Value *Size) { + ConstantInt *CI = dyn_cast<ConstantInt>(Size); + return !CI || (CI->getZExtValue() > MaxStaticSize); +} + +static bool expandMemIntrinsicUses(Function &F) { + Intrinsic::ID ID = F.getIntrinsicID(); + bool Changed; + + for (auto I = F.user_begin(), E = F.user_end(); I != E;) { + Instruction *Inst = cast<Instruction>(*I); + ++I; + + switch (ID) { + case Intrinsic::memcpy: { + auto *Memcpy = cast<MemCpyInst>(Inst); + if (shouldExpandOperationWithSize(Memcpy->getLength())) { + expandMemCpyAsLoop(Memcpy); + Changed = true; + Memcpy->eraseFromParent(); + } + + break; + } + case Intrinsic::memmove: { + auto *Memmove = cast<MemMoveInst>(Inst); + if (shouldExpandOperationWithSize(Memmove->getLength())) { + expandMemMoveAsLoop(Memmove); + Changed = true; + Memmove->eraseFromParent(); + } + + break; + } + case Intrinsic::memset: { + auto *Memset = cast<MemSetInst>(Inst); + if (shouldExpandOperationWithSize(Memset->getLength())) { + expandMemSetAsLoop(Memset); + Changed = true; + Memset->eraseFromParent(); + } + + break; + } + default: + break; + } + } + + return Changed; +} + +bool AMDGPULowerIntrinsics::runOnModule(Module &M) { + bool Changed = false; + + for (Function &F : M) { + if (!F.isDeclaration()) + continue; + + switch (F.getIntrinsicID()) { + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: + if (expandMemIntrinsicUses(F)) + Changed = true; + break; + default: + break; + } + } + + return Changed; +} + +ModulePass *llvm::createAMDGPULowerIntrinsicsPass() { + return new AMDGPULowerIntrinsics(); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 02fabf24408..1847d177137 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -108,6 +108,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() { initializeSILoadStoreOptimizerPass(*PR); initializeAMDGPUAnnotateKernelFeaturesPass(*PR); initializeAMDGPUAnnotateUniformValuesPass(*PR); + initializeAMDGPULowerIntrinsicsPass(*PR); initializeAMDGPUPromoteAllocaPass(*PR); initializeAMDGPUCodeGenPreparePass(*PR); initializeAMDGPUUnifyMetadataPass(*PR); @@ -472,6 +473,8 @@ void AMDGPUPassConfig::addIRPasses() { disablePass(&FuncletLayoutID); disablePass(&PatchableFunctionID); + addPass(createAMDGPULowerIntrinsicsPass()); + // Function calls are not supported, so make sure we inline everything. addPass(createAMDGPUAlwaysInlinePass()); addPass(createAlwaysInlinerLegacyPass()); diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index 65853bb6a51..a1c263c8764 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -45,6 +45,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUTargetObjectFile.cpp AMDGPUIntrinsicInfo.cpp AMDGPUISelDAGToDAG.cpp + AMDGPULowerIntrinsics.cpp AMDGPUMCInstLower.cpp AMDGPUMachineFunction.cpp AMDGPUUnifyMetadata.cpp |