summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td5
-rw-r--r--llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp15
5 files changed, 32 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 32e83cb385f..ee2b415099b 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -338,6 +338,9 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
unsigned &SMovOp,
int64_t &Imm) {
+ if (Copy->getOpcode() != AMDGPU::COPY)
+ return false;
+
if (!MoveImm->isMoveImmediate())
return false;
@@ -564,7 +567,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
switch (MI.getOpcode()) {
default:
continue;
- case AMDGPU::COPY: {
+ case AMDGPU::COPY:
+ case AMDGPU::WQM: {
// If the destination register is a physical register there isn't really
// much we can do to fix this.
if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 7abb4636d72..fecad1e1646 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3942,6 +3942,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1), Op.getOperand(2));
return DAG.getNode(ISD::BITCAST, DL, VT, Node);
}
+ case Intrinsic::amdgcn_wqm: {
+ SDValue Src = Op.getOperand(1);
+ return SDValue(DAG.getMachineNode(AMDGPU::WQM, DL, Src.getValueType(), Src),
+ 0);
+ }
default:
return Op;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 1d884524bcd..dc4b9998786 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2666,6 +2666,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::COPY: return AMDGPU::COPY;
case AMDGPU::PHI: return AMDGPU::PHI;
case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
+ case AMDGPU::WQM: return AMDGPU::WQM;
case AMDGPU::S_MOV_B32:
return MI.getOperand(1).isReg() ?
AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
@@ -3970,6 +3971,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
case AMDGPU::PHI:
case AMDGPU::REG_SEQUENCE:
case AMDGPU::INSERT_SUBREG:
+ case AMDGPU::WQM:
if (RI.hasVGPRs(NewDstRC))
return nullptr;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 50e806188a9..f20ce4d4b28 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -116,6 +116,11 @@ def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst),
// pass to enable folding of inline immediates.
def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
(ins VSrc_b64:$src0)>;
+
+// Pseudoinstruction for @llvm.amdgcn.wqm. It is turned into a copy
+// after the WQM pass processes them.
+def WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;
+
} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
let usesCustomInserter = 1, SALU = 1 in {
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index a613a220e29..62e1b7e84c4 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -136,6 +136,7 @@ private:
DenseMap<const MachineInstr *, InstrInfo> Instructions;
DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
SmallVector<MachineInstr *, 1> LiveMaskQueries;
+ SmallVector<MachineInstr *, 4> LowerToCopyInstrs;
void printInfo();
@@ -162,6 +163,7 @@ private:
void processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool isEntry);
void lowerLiveMaskQueries(unsigned LiveMaskReg);
+ void lowerCopyInstrs();
public:
static char ID;
@@ -294,6 +296,11 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
markUsesWQM(MI, Worklist);
GlobalFlags |= StateWQM;
continue;
+ } else if (Opcode == AMDGPU::WQM) {
+ // The WQM intrinsic requires its output to have all the helper lanes
+ // correct, so we need it to be in WQM.
+ Flags = StateWQM;
+ LowerToCopyInstrs.push_back(&MI);
} else if (TII->isDisableWQM(MI)) {
Flags = StateExact;
} else {
@@ -666,6 +673,11 @@ void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
}
}
+void SIWholeQuadMode::lowerCopyInstrs() {
+ for (MachineInstr *MI : LowerToCopyInstrs)
+ MI->setDesc(TII->get(AMDGPU::COPY));
+}
+
bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
if (MF.getFunction()->getCallingConv() != CallingConv::AMDGPU_PS)
return false;
@@ -673,6 +685,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
Instructions.clear();
Blocks.clear();
LiveMaskQueries.clear();
+ LowerToCopyInstrs.clear();
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
@@ -708,6 +721,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
.addReg(AMDGPU::EXEC);
lowerLiveMaskQueries(LiveMaskReg);
+ lowerCopyInstrs();
// EntryMI may become invalid here
return true;
}
@@ -716,6 +730,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
DEBUG(printInfo());
lowerLiveMaskQueries(LiveMaskReg);
+ lowerCopyInstrs();
// Handle the general case
for (auto BII : Blocks)
OpenPOWER on IntegriCloud