summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-02-09 22:00:42 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-02-09 22:00:42 +0000
commit0699ef39ce8498ea6ee625babfefabd3f648776a (patch)
treea5f44cc462c597ae5d70a8b33207b5e098e96033 /llvm/lib/Target/AMDGPU
parentd7dd65ad7c0eb8ba0345154af23b10f2c4be4e8e (diff)
downloadbcm5719-llvm-0699ef39ce8498ea6ee625babfefabd3f648776a.tar.gz
bcm5719-llvm-0699ef39ce8498ea6ee625babfefabd3f648776a.zip
AMDGPU: Add pass to expand memcpy/memmove/memset
llvm-svn: 294635
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp123
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/CMakeLists.txt1
5 files changed, 136 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 0916bdd2fcb..c06c9f30a5f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -51,6 +51,10 @@ ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
extern char &AMDGPUAnnotateKernelFeaturesID;
+ModulePass *createAMDGPULowerIntrinsicsPass();
+void initializeAMDGPULowerIntrinsicsPass(PassRegistry &);
+extern char &AMDGPULowerIntrinsicsID;
+
void initializeSIFoldOperandsPass(PassRegistry &);
extern char &SIFoldOperandsID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index d9b0b500945..12c17c3e9eb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -461,10 +461,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// N > 4 stores on the same chain.
GatherAllAliasesMaxDepth = 16;
- // FIXME: Need to really handle these.
- MaxStoresPerMemcpy = 4096;
- MaxStoresPerMemmove = 4096;
- MaxStoresPerMemset = 4096;
+ // memcpy/memmove/memset are expanded in the IR, so we shouldn't need to worry
+ // about these during lowering.
+ MaxStoresPerMemcpy = 0xffffffff;
+ MaxStoresPerMemmove = 0xffffffff;
+ MaxStoresPerMemset = 0xffffffff;
setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::SHL);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
new file mode 100644
index 00000000000..f5ed66bcebc
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
@@ -0,0 +1,123 @@
+//===-- AMDGPULowerIntrinsics.cpp -----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
+
+#define DEBUG_TYPE "amdgpu-lower-intrinsics"
+
+using namespace llvm;
+
+namespace {
+
+const unsigned MaxStaticSize = 1024;
+
+class AMDGPULowerIntrinsics : public ModulePass {
+public:
+ static char ID;
+
+ AMDGPULowerIntrinsics() : ModulePass(ID) { }
+ bool runOnModule(Module &M) override;
+ StringRef getPassName() const override {
+ return "AMDGPU Lower Intrinsics";
+ }
+};
+
+}
+
+char AMDGPULowerIntrinsics::ID = 0;
+
+char &llvm::AMDGPULowerIntrinsicsID = AMDGPULowerIntrinsics::ID;
+
+INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE,
+ "Lower intrinsics", false, false)
+
+// TODO: Should refine based on estimated number of accesses (e.g. does it
+// require splitting based on alignment)
+static bool shouldExpandOperationWithSize(Value *Size) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(Size);
+ return !CI || (CI->getZExtValue() > MaxStaticSize);
+}
+
+static bool expandMemIntrinsicUses(Function &F) {
+ Intrinsic::ID ID = F.getIntrinsicID();
+ bool Changed;
+
+ for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
+ Instruction *Inst = cast<Instruction>(*I);
+ ++I;
+
+ switch (ID) {
+ case Intrinsic::memcpy: {
+ auto *Memcpy = cast<MemCpyInst>(Inst);
+ if (shouldExpandOperationWithSize(Memcpy->getLength())) {
+ expandMemCpyAsLoop(Memcpy);
+ Changed = true;
+ Memcpy->eraseFromParent();
+ }
+
+ break;
+ }
+ case Intrinsic::memmove: {
+ auto *Memmove = cast<MemMoveInst>(Inst);
+ if (shouldExpandOperationWithSize(Memmove->getLength())) {
+ expandMemMoveAsLoop(Memmove);
+ Changed = true;
+ Memmove->eraseFromParent();
+ }
+
+ break;
+ }
+ case Intrinsic::memset: {
+ auto *Memset = cast<MemSetInst>(Inst);
+ if (shouldExpandOperationWithSize(Memset->getLength())) {
+ expandMemSetAsLoop(Memset);
+ Changed = true;
+ Memset->eraseFromParent();
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+bool AMDGPULowerIntrinsics::runOnModule(Module &M) {
+ bool Changed = false;
+
+ for (Function &F : M) {
+ if (!F.isDeclaration())
+ continue;
+
+ switch (F.getIntrinsicID()) {
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ if (expandMemIntrinsicUses(F))
+ Changed = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+ModulePass *llvm::createAMDGPULowerIntrinsicsPass() {
+ return new AMDGPULowerIntrinsics();
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 02fabf24408..1847d177137 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -108,6 +108,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeSILoadStoreOptimizerPass(*PR);
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
initializeAMDGPUAnnotateUniformValuesPass(*PR);
+ initializeAMDGPULowerIntrinsicsPass(*PR);
initializeAMDGPUPromoteAllocaPass(*PR);
initializeAMDGPUCodeGenPreparePass(*PR);
initializeAMDGPUUnifyMetadataPass(*PR);
@@ -472,6 +473,8 @@ void AMDGPUPassConfig::addIRPasses() {
disablePass(&FuncletLayoutID);
disablePass(&PatchableFunctionID);
+ addPass(createAMDGPULowerIntrinsicsPass());
+
// Function calls are not supported, so make sure we inline everything.
addPass(createAMDGPUAlwaysInlinePass());
addPass(createAlwaysInlinerLegacyPass());
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 65853bb6a51..a1c263c8764 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -45,6 +45,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUTargetObjectFile.cpp
AMDGPUIntrinsicInfo.cpp
AMDGPUISelDAGToDAG.cpp
+ AMDGPULowerIntrinsics.cpp
AMDGPUMCInstLower.cpp
AMDGPUMachineFunction.cpp
AMDGPUUnifyMetadata.cpp
OpenPOWER on IntegriCloud