summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-04-14 21:58:15 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-04-14 21:58:15 +0000
commit4ac341c8b31ab34c7cb90eda91a91e78a11a8baf (patch)
tree195717ee1a4094630e5031af4dcf161e748169f8 /llvm
parent7900334dd530416b70fb04c8abb6f8c2c65da86d (diff)
downloadbcm5719-llvm-4ac341c8b31ab34c7cb90eda91a91e78a11a8baf.tar.gz
bcm5719-llvm-4ac341c8b31ab34c7cb90eda91a91e78a11a8baf.zip
AMDGPU: Directly emit m0 initialization with s_mov_b32
Currently what comes out of instruction selection is a register initialized to -1, and then copied to m0. MachineCSE doesn't consider copies, but we want these to be CSEed. This isn't much of a problem currently, because SIFoldOperands is run immediately after. This avoids regressions when SIFoldOperands is run later from leaving all copies to m0. llvm-svn: 266377
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp33
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td18
2 files changed, 37 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ee7ad3293d9..52880a282d9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1103,10 +1103,18 @@ unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ StringRef(RegName) + "\"."));
}
-MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
- MachineInstr * MI, MachineBasicBlock * BB) const {
-
+MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr *MI, MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
+ case AMDGPU::SI_INIT_M0: {
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+ BuildMI(*BB, MI->getIterator(), MI->getDebugLoc(),
+ TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addOperand(MI->getOperand(0));
+ MI->eraseFromParent();
+ break;
+ }
case AMDGPU::BRANCH:
return BB;
case AMDGPU::GET_GROUPSTATICSIZE: {
@@ -1395,19 +1403,18 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
SDValue V) const {
+ // We can't use S_MOV_B32 directly, because there is no way to specify m0 as
+ // the destination register.
+ //
// We can't use CopyToReg, because MachineCSE won't combine COPY instructions,
// so we will end up with redundant moves to m0.
//
- // We can't use S_MOV_B32, because there is no way to specify m0 as the
- // destination register.
- //
- // We have to use them both. Machine cse will combine all the S_MOV_B32
- // instructions and the register coalescer eliminate the extra copies.
- SDNode *M0 = DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, V.getValueType(), V);
- return DAG.getCopyToReg(Chain, DL, DAG.getRegister(AMDGPU::M0, MVT::i32),
- SDValue(M0, 0), SDValue()); // Glue
- // A Null SDValue creates
- // a glue result.
+ // We use a pseudo to ensure we emit s_mov_b32 with m0 as the direct result.
+
+ // A Null SDValue creates a glue result.
+ SDNode *M0 = DAG.getMachineNode(AMDGPU::SI_INIT_M0, DL, MVT::Other, MVT::Glue,
+ V, Chain);
+ return SDValue(M0, 0);
}
SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 4620ec05752..7b8a62bc8fb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2014,7 +2014,23 @@ def SI_KILL : InstSI <
} // End mayLoad = 1, mayStore = 1, hasSideEffects = 1
-let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
+// Used as an isel pseudo to directly emit initialization with an
+// s_mov_b32 rather than a copy of another initialized
+// register. MachineCSE skips copies, and we don't want to have to
+// fold operands before it runs.
+def SI_INIT_M0 : InstSI <
+ (outs),
+ (ins SSrc_32:$src), "", []> {
+ let Defs = [M0];
+ let usesCustomInserter = 1;
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
+ let isAsCheapAsAMove = 1;
+ let SALU = 1;
+ let isReMaterializable = 1;
+}
+
+let Uses = [EXEC], Defs = [EXEC, VCC, M0] in {
class SI_INDIRECT_SRC<RegisterClass rc> : InstSI <
(outs VGPR_32:$dst, SReg_64:$temp),
OpenPOWER on IntegriCloud