summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp34
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h4
-rw-r--r--llvm/test/CodeGen/AMDGPU/smrd.ll21
4 files changed, 61 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index f04efd71fa0..d2562615735 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -983,14 +983,6 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
return true;
}
-static bool isLegalMUBUFImmOffset(unsigned Imm) {
- return isUInt<12>(Imm);
-}
-
-static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
- return isLegalMUBUFImmOffset(Imm->getZExtValue());
-}
-
bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SDValue &VAddr, SDValue &SOffset,
SDValue &Offset, SDValue &Offen,
@@ -1032,7 +1024,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
Ptr = N0;
}
- if (isLegalMUBUFImmOffset(C1)) {
+ if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
return true;
}
@@ -1142,7 +1134,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
unsigned Imm = CAddr->getZExtValue();
- assert(!isLegalMUBUFImmOffset(Imm) &&
+ assert(!SIInstrInfo::isLegalMUBUFImmOffset(Imm) &&
"should have been selected by other pattern");
SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
@@ -1169,7 +1161,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
// Offsets in vaddr must be positive.
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
- if (isLegalMUBUFImmOffset(C1)) {
+ if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
std::tie(VAddr, SOffset) = foldFrameIndex(N0);
ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
return true;
@@ -1188,7 +1180,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
SDValue &SOffset,
SDValue &Offset) const {
ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
- if (!CAddr || !isLegalMUBUFImmOffset(CAddr))
+ if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
return false;
SDLoc DL(Addr);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 542bfdaffbf..5b851749a80 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3712,13 +3712,43 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR: {
unsigned VDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ const MachineOperand *VAddr = getNamedOperand(Inst, AMDGPU::OpName::soff);
+ auto Add = MRI.getUniqueVRegDef(VAddr->getReg());
+ unsigned Offset = 0;
+
+ // See if we can extract an immediate offset by recognizing one of these:
+ // V_ADD_I32_e32 dst, imm, src1
+ // V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1
+ // V_ADD will be removed by "Remove dead machine instructions".
+ if (Add && Add->getOpcode() == AMDGPU::V_ADD_I32_e32) {
+ const MachineOperand *Src =
+ getNamedOperand(*Add, AMDGPU::OpName::src0);
+
+ if (Src && Src->isReg()) {
+ auto Mov = MRI.getUniqueVRegDef(Src->getReg());
+ if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
+ Src = &Mov->getOperand(1);
+ }
+
+ if (Src) {
+ if (Src->isImm())
+ Offset = Src->getImm();
+ else if (Src->isCImm())
+ Offset = Src->getCImm()->getZExtValue();
+ }
+
+ if (Offset && isLegalMUBUFImmOffset(Offset))
+ VAddr = getNamedOperand(*Add, AMDGPU::OpName::src1);
+ else
+ Offset = 0;
+ }
BuildMI(*MBB, Inst, Inst.getDebugLoc(),
get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
- .add(*getNamedOperand(Inst, AMDGPU::OpName::soff)) // vaddr
+ .add(*VAddr) // vaddr
.add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc
.addImm(0) // soffset
- .addImm(0) // offset
+ .addImm(Offset) // offset
.addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm())
.addImm(0) // slc
.addImm(0) // tfe
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index f8de0efc5dd..5e84e0c9a17 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -860,6 +860,10 @@ public:
static bool isKillTerminator(unsigned Opcode);
const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
+
+ static bool isLegalMUBUFImmOffset(unsigned Imm) {
+ return isUInt<12>(Imm);
+ }
};
namespace AMDGPU {
diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll
index cc2f6ce76b5..ab7e7422d57 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/smrd.ll
@@ -191,6 +191,27 @@ main_body:
ret float %r
}
+; GCN-LABEL: {{^}}smrd_vgpr_offset_imm:
+; GCN-NEXT: BB#
+; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;
+define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 {
+main_body:
+ %off = add i32 %offset, 4095
+ %r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %off)
+ ret float %r
+}
+
+; GCN-LABEL: {{^}}smrd_vgpr_offset_imm_too_large:
+; GCN-NEXT: BB#
+; GCN-NEXT: v_add_i32_e32 v0, vcc, 0x1000, v0
+; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ;
+define amdgpu_ps float @smrd_vgpr_offset_imm_too_large(<4 x i32> inreg %desc, i32 %offset) #0 {
+main_body:
+ %off = add i32 %offset, 4096
+ %r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %off)
+ ret float %r
+}
+
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
OpenPOWER on IntegriCloud