summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp78
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp28
3 files changed, 98 insertions, 9 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index b4a7a65386d..93dcd728a0c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -440,6 +440,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setSchedulingPreference(Sched::RegPressure);
setJumpIsExpensive(true);
+ setHasMultipleConditionRegisters(true);
// SI at least has hardware support for floating point exceptions, but no way
// of using or handling them is implemented. They are also optional in OpenCL
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 9f1178c40ab..7ed18f27e59 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -80,6 +80,11 @@ private:
void emitLoop(MachineInstr &MI);
void emitEndCf(MachineInstr &MI);
+ void findMaskOperands(MachineInstr &MI, unsigned OpNo,
+ SmallVectorImpl<MachineOperand> &Src) const;
+
+ void combineMasks(MachineInstr &MI);
+
public:
static char ID;
@@ -336,6 +341,62 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
LIS->handleMove(*NewMI);
}
+// Returns replace operands for a logical operation, either single result
+// for exec or two operands if source was another equivalent operation.
+void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
+ SmallVectorImpl<MachineOperand> &Src) const {
+ MachineOperand &Op = MI.getOperand(OpNo);
+ if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
+ Src.push_back(Op);
+ return;
+ }
+
+ MachineInstr *Def = MRI->getUniqueVRegDef(Op.getReg());
+ if (!Def || Def->getParent() != MI.getParent() ||
+ !(Def->isFullCopy() || (Def->getOpcode() == MI.getOpcode())))
+ return;
+
+ // Make sure we do not modify exec between def and use.
+ // A copy with implcitly defined exec inserted earlier is an exclusion, it
+ // does not really modify exec.
+ for (auto I = Def->getIterator(); I != MI.getIterator(); ++I)
+ if (I->modifiesRegister(AMDGPU::EXEC, TRI) &&
+ !(I->isCopy() && I->getOperand(0).getReg() != AMDGPU::EXEC))
+ return;
+
+ for (const auto &SrcOp : Def->explicit_operands())
+ if (SrcOp.isUse() && (!SrcOp.isReg() ||
+ TargetRegisterInfo::isVirtualRegister(SrcOp.getReg()) ||
+ SrcOp.getReg() == AMDGPU::EXEC))
+ Src.push_back(SrcOp);
+}
+
+// Search and combine pairs of equivalent instructions, like
+// S_AND_B64 x, (S_AND_B64 x, y) => S_AND_B64 x, y
+// S_OR_B64 x, (S_OR_B64 x, y) => S_OR_B64 x, y
+// One of the operands is exec mask.
+void SILowerControlFlow::combineMasks(MachineInstr &MI) {
+ assert(MI.getNumExplicitOperands() == 3);
+ SmallVector<MachineOperand, 4> Ops;
+ unsigned OpToReplace = 1;
+ findMaskOperands(MI, 1, Ops);
+ if (Ops.size() == 1) OpToReplace = 2; // First operand can be exec or its copy
+ findMaskOperands(MI, 2, Ops);
+ if (Ops.size() != 3) return;
+
+ unsigned UniqueOpndIdx;
+ if (Ops[0].isIdenticalTo(Ops[1])) UniqueOpndIdx = 2;
+ else if (Ops[0].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
+ else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
+ else return;
+
+ unsigned Reg = MI.getOperand(OpToReplace).getReg();
+ MI.RemoveOperand(OpToReplace);
+ MI.addOperand(Ops[UniqueOpndIdx]);
+ if (MRI->use_empty(Reg))
+ MRI->getUniqueVRegDef(Reg)->eraseFromParent();
+}
+
bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
TII = ST.getInstrInfo();
@@ -351,9 +412,9 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
NextBB = std::next(BI);
MachineBasicBlock &MBB = *BI;
- MachineBasicBlock::iterator I, Next;
+ MachineBasicBlock::iterator I, Next, Last;
- for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ for (I = MBB.begin(), Last = MBB.end(); I != MBB.end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
@@ -386,9 +447,20 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
emitEndCf(MI);
break;
+ case AMDGPU::S_AND_B64:
+ case AMDGPU::S_OR_B64:
+ // Cleanup bit manipulations on exec mask
+ combineMasks(MI);
+ Last = I;
+ continue;
+
default:
- break;
+ Last = I;
+ continue;
}
+
+ // Replay newly inserted code to combine masks
+ Next = (Last == MBB.end()) ? MBB.begin() : Last;
}
}
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 9e62980940b..be2e14fd462 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -100,12 +100,12 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
const TargetRegisterClass *DstRC = MRI.getRegClass(Dst.getReg());
const TargetRegisterClass *SrcRC = MRI.getRegClass(Src.getReg());
+ DebugLoc DL = MI.getDebugLoc();
+ MachineInstr *DefInst = MRI.getUniqueVRegDef(Src.getReg());
if (DstRC == &AMDGPU::VReg_1RegClass &&
TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) {
I1Defs.push_back(Dst.getReg());
- DebugLoc DL = MI.getDebugLoc();
- MachineInstr *DefInst = MRI.getUniqueVRegDef(Src.getReg());
if (DefInst->getOpcode() == AMDGPU::S_MOV_B64) {
if (DefInst->getOperand(1).isImm()) {
I1Defs.push_back(Dst.getReg());
@@ -129,10 +129,26 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
MI.eraseFromParent();
} else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
SrcRC == &AMDGPU::VReg_1RegClass) {
- BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_U32_e64))
- .addOperand(Dst)
- .addOperand(Src)
- .addImm(0);
+ if (DefInst->getOpcode() == AMDGPU::V_CNDMASK_B32_e64 &&
+ DefInst->getOperand(1).isImm() && DefInst->getOperand(2).isImm() &&
+ DefInst->getOperand(1).getImm() == 0 &&
+ DefInst->getOperand(2).getImm() != 0 &&
+ DefInst->getOperand(3).isReg() &&
+ TargetRegisterInfo::isVirtualRegister(
+ DefInst->getOperand(3).getReg()) &&
+ TRI->getCommonSubClass(
+ MRI.getRegClass(DefInst->getOperand(3).getReg()),
+ &AMDGPU::SGPR_64RegClass)) {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64))
+ .addOperand(Dst)
+ .addReg(AMDGPU::EXEC)
+ .addOperand(DefInst->getOperand(3));
+ } else {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64))
+ .addOperand(Dst)
+ .addOperand(Src)
+ .addImm(0);
+ }
MI.eraseFromParent();
}
}
OpenPOWER on IntegriCloud