summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-11-23 21:51:07 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-11-23 21:51:07 +0000
commita24d84beb9965f2f936363e2f30c58242b199af6 (patch)
treeaf4178147fc5cfb717035abe3426fee9ad837182
parent391c3ea9bc0a728dad00d4e64cac852bf6d70a91 (diff)
downloadbcm5719-llvm-a24d84beb9965f2f936363e2f30c58242b199af6.tar.gz
bcm5719-llvm-a24d84beb9965f2f936363e2f30c58242b199af6.zip
AMDGPU: Cleanup immediate folding code
Move code down to use, reorder to avoid hard to follow immediate folding logic. llvm-svn: 287818
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp126
1 files changed, 62 insertions, 64 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a54b628520c..e03913bff5a 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -249,63 +249,16 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
return;
}
- bool FoldingImm = OpToFold.isImm();
- APInt Imm;
-
- if (FoldingImm) {
- unsigned UseReg = UseOp.getReg();
- const TargetRegisterClass *UseRC
- = TargetRegisterInfo::isVirtualRegister(UseReg) ?
- MRI.getRegClass(UseReg) :
- TRI.getPhysRegClass(UseReg);
-
- Imm = APInt(64, OpToFold.getImm());
-
- const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode());
- const TargetRegisterClass *FoldRC =
- TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);
-
- // Split 64-bit constants into 32-bits for folding.
- if (FoldRC->getSize() == 8 && UseOp.getSubReg()) {
- if (UseRC->getSize() != 8)
- return;
-
- if (UseOp.getSubReg() == AMDGPU::sub0) {
- Imm = Imm.getLoBits(32);
- } else {
- assert(UseOp.getSubReg() == AMDGPU::sub1);
- Imm = Imm.getHiBits(32);
- }
- }
-
- // In order to fold immediates into copies, we need to change the
- // copy to a MOV.
- if (UseMI->getOpcode() == AMDGPU::COPY) {
- unsigned DestReg = UseMI->getOperand(0).getReg();
- const TargetRegisterClass *DestRC
- = TargetRegisterInfo::isVirtualRegister(DestReg) ?
- MRI.getRegClass(DestReg) :
- TRI.getPhysRegClass(DestReg);
-
- unsigned MovOp = TII->getMovOpcode(DestRC);
- if (MovOp == AMDGPU::COPY)
- return;
-
- UseMI->setDesc(TII->get(MovOp));
- CopiesToReplace.push_back(UseMI);
- }
- }
-
// Special case for REG_SEQUENCE: We can't fold literals into
// REG_SEQUENCE instructions, so we have to fold them into the
// uses of REG_SEQUENCE.
- if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+ if (UseMI->isRegSequence()) {
unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
for (MachineRegisterInfo::use_iterator
- RSUse = MRI.use_begin(RegSeqDstReg),
- RSE = MRI.use_end(); RSUse != RSE; ++RSUse) {
+ RSUse = MRI.use_begin(RegSeqDstReg), RSE = MRI.use_end();
+ RSUse != RSE; ++RSUse) {
MachineInstr *RSUseMI = RSUse->getParent();
if (RSUse->getSubReg() != RegSeqDstSubReg)
@@ -314,29 +267,74 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
CopiesToReplace, TII, TRI, MRI);
}
+
return;
}
- const MCInstrDesc &UseDesc = UseMI->getDesc();
- // Don't fold into target independent nodes. Target independent opcodes
- // don't have defined register classes.
- if (UseDesc.isVariadic() ||
- UseDesc.OpInfo[UseOpIdx].RegClass == -1)
- return;
+ bool FoldingImm = OpToFold.isImm();
- if (FoldingImm) {
- MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
- tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
+ // In order to fold immediates into copies, we need to change the
+ // copy to a MOV.
+ if (FoldingImm && UseMI->isCopy()) {
+ unsigned DestReg = UseMI->getOperand(0).getReg();
+ const TargetRegisterClass *DestRC
+ = TargetRegisterInfo::isVirtualRegister(DestReg) ?
+ MRI.getRegClass(DestReg) :
+ TRI.getPhysRegClass(DestReg);
+
+ unsigned MovOp = TII->getMovOpcode(DestRC);
+ if (MovOp == AMDGPU::COPY)
+ return;
+
+ UseMI->setDesc(TII->get(MovOp));
+ CopiesToReplace.push_back(UseMI);
+ } else {
+ const MCInstrDesc &UseDesc = UseMI->getDesc();
+
+ // Don't fold into target independent nodes. Target independent opcodes
+ // don't have defined register classes.
+ if (UseDesc.isVariadic() ||
+ UseDesc.OpInfo[UseOpIdx].RegClass == -1)
+ return;
+ }
+
+ if (!FoldingImm) {
+ tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
+
+ // FIXME: We could try to change the instruction from 64-bit to 32-bit
+ // to enable more folding opportunites. The shrink operands pass
+ // already does this.
return;
}
- tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
+ APInt Imm(64, OpToFold.getImm());
+
+ const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
+ const TargetRegisterClass *FoldRC =
+ TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);
+
+ // Split 64-bit constants into 32-bits for folding.
+ if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
+ unsigned UseReg = UseOp.getReg();
+ const TargetRegisterClass *UseRC
+ = TargetRegisterInfo::isVirtualRegister(UseReg) ?
+ MRI.getRegClass(UseReg) :
+ TRI.getPhysRegClass(UseReg);
+
+ if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
+ return;
+
+ if (UseOp.getSubReg() == AMDGPU::sub0) {
+ Imm = Imm.getLoBits(32);
+ } else {
+ assert(UseOp.getSubReg() == AMDGPU::sub1);
+ Imm = Imm.getHiBits(32);
+ }
+ }
- // FIXME: We could try to change the instruction from 64-bit to 32-bit
- // to enable more folding opportunites. The shrink operands pass
- // already does this.
- return;
+ MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
+ tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
}
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
OpenPOWER on IntegriCloud