summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp26
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td66
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td12
5 files changed, 94 insertions, 25 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 251c2f9bb25..fea518d1871 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -138,7 +138,8 @@ private:
bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
SDValue &ImmOffset, SDValue &VOffset) const;
- bool SelectFlat(SDValue Addr, SDValue &VAddr, SDValue &SLC) const;
+ bool SelectFlat(SDValue Addr, SDValue &VAddr,
+ SDValue &Offset, SDValue &SLC) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
@@ -1315,8 +1316,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
SDValue &VAddr,
+ SDValue &Offset,
SDValue &SLC) const {
VAddr = Addr;
+ Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i16);
SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index a7eac080f88..e54c887d609 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -126,8 +126,9 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD))
.add(I.getOperand(1))
.add(I.getOperand(0))
- .addImm(0)
- .addImm(0);
+ .addImm(0) // offset
+ .addImm(0) // glc
+ .addImm(0); // slc
// Now that we selected an opcode, we need to constrain the register
@@ -392,8 +393,9 @@ bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
.add(I.getOperand(0))
.addReg(PtrReg)
- .addImm(0)
- .addImm(0);
+ .addImm(0) // offset
+ .addImm(0) // glc
+ .addImm(0); // slc
bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
I.eraseFromParent();
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 16e3b7b4ebe..871aa089b62 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -285,6 +285,9 @@ public:
bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
+
+ bool isOffsetU12() const { return isImmTy(ImmTyOffset) && isUInt<12>(getImm()); }
+ bool isOffsetS13() const { return isImmTy(ImmTyOffset) && isInt<13>(getImm()); }
bool isGDS() const { return isImmTy(ImmTyGDS); }
bool isGLC() const { return isImmTy(ImmTyGLC); }
bool isSLC() const { return isImmTy(ImmTySLC); }
@@ -886,6 +889,10 @@ public:
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
}
+ bool hasFlatOffsets() const {
+ return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
+ }
+
bool hasSGPR102_SGPR103() const {
return !isVI();
}
@@ -1034,6 +1041,8 @@ public:
AMDGPUOperand::Ptr defaultSMRDOffset8() const;
AMDGPUOperand::Ptr defaultSMRDOffset20() const;
AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
+ AMDGPUOperand::Ptr defaultOffsetU12() const;
+ AMDGPUOperand::Ptr defaultOffsetS13() const;
OperandMatchResultTy parseOModOperand(OperandVector &Operands);
@@ -1970,6 +1979,15 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
}
}
+ if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
+ // FIXME: Produces error without correct column reported.
+ auto OpNum =
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
+ const auto &Op = Inst.getOperand(OpNum);
+ if (Op.getImm() != 0)
+ return Match_InvalidOperand;
+ }
+
return Match_Success;
}
@@ -3849,6 +3867,14 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
}
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
+}
+
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
+}
+
//===----------------------------------------------------------------------===//
// vop3
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 8ba9efd42c7..0848853c00b 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
-def FLATAtomic : ComplexPattern<i64, 2, "SelectFlat">;
+def FLATAtomic : ComplexPattern<i64, 3, "SelectFlat">;
//===----------------------------------------------------------------------===//
// FLAT classes
@@ -55,6 +55,8 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
// copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
let AsmMatchConverter = ps.AsmMatchConverter;
+ let TSFlags = ps.TSFlags;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
// encoding fields
bits<8> vaddr;
@@ -63,10 +65,23 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
bits<1> slc;
bits<1> glc;
+ // Only valid on gfx9
+ bits<1> lds = 0; // XXX - What does this actually do?
+ bits<2> seg; // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
+
+ // Signed offset. Highest bit ignored for flat and treated as 12-bit
+ // unsigned for flat acceses.
+ bits<13> offset;
+ bits<1> nv = 0; // XXX - What does this actually do?
+
// We don't use tfe right now, and it was removed in gfx9.
bits<1> tfe = 0;
- // 15-0 is reserved.
+ // Only valid on GFX9+
+ let Inst{12-0} = offset;
+ let Inst{13} = lds;
+ let Inst{15-14} = 0;
+
let Inst{16} = !if(ps.has_glc, glc, ps.glcValue);
let Inst{17} = slc;
let Inst{24-18} = op;
@@ -74,24 +89,30 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
let Inst{39-32} = vaddr;
let Inst{47-40} = !if(ps.has_data, vdata, ?);
// 54-48 is reserved.
- let Inst{55} = tfe;
+ let Inst{55} = nv; // nv on GFX9+, TFE before.
let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
}
-class FLAT_Load_Pseudo <string opName, RegisterClass regClass> : FLAT_Pseudo<
+class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
+ bit HasSignedOffset = 0> : FLAT_Pseudo<
opName,
(outs regClass:$vdst),
- (ins VReg_64:$vaddr, GLC:$glc, slc:$slc),
- " $vdst, $vaddr$glc$slc"> {
+ !if(HasSignedOffset,
+ (ins VReg_64:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc),
+ (ins VReg_64:$vaddr, offset_u12:$offset, GLC:$glc, slc:$slc)),
+ " $vdst, $vaddr$offset$glc$slc"> {
let has_data = 0;
let mayLoad = 1;
}
-class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass> : FLAT_Pseudo<
+class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
+ bit HasSignedOffset = 0> : FLAT_Pseudo<
opName,
(outs),
- (ins VReg_64:$vaddr, vdataClass:$vdata, GLC:$glc, slc:$slc),
- " $vaddr, $vdata$glc$slc"> {
+ !if(HasSignedOffset,
+ (ins VReg_64:$vaddr, vdataClass:$vdata, offset_s13:$offset, GLC:$glc, slc:$slc),
+ (ins VReg_64:$vaddr, vdataClass:$vdata, offset_u12:$offset, GLC:$glc, slc:$slc)),
+ " $vaddr, $vdata$offset$glc$slc"> {
let mayLoad = 0;
let mayStore = 1;
let has_vdst = 0;
@@ -103,12 +124,15 @@ multiclass FLAT_Atomic_Pseudo<
ValueType vt,
SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
- RegisterClass data_rc = vdst_rc> {
+ RegisterClass data_rc = vdst_rc,
+ bit HasSignedOffset = 0> {
def "" : FLAT_Pseudo <opName,
(outs),
- (ins VReg_64:$vaddr, data_rc:$vdata, slc:$slc),
- " $vaddr, $vdata$slc",
+ !if(HasSignedOffset,
+ (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc),
+ (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, slc:$slc)),
+ " $vaddr, $vdata$offset$slc",
[]>,
AtomicNoRet <NAME, 0> {
let mayLoad = 1;
@@ -121,10 +145,12 @@ multiclass FLAT_Atomic_Pseudo<
def _RTN : FLAT_Pseudo <opName,
(outs vdst_rc:$vdst),
- (ins VReg_64:$vaddr, data_rc:$vdata, slc:$slc),
- " $vdst, $vaddr, $vdata glc$slc",
+ !if(HasSignedOffset,
+ (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc),
+ (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, slc:$slc)),
+ " $vdst, $vaddr, $vdata$offset glc$slc",
[(set vt:$vdst,
- (atomic (FLATAtomic i64:$vaddr, i1:$slc), data_vt:$vdata))]>,
+ (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
AtomicNoRet <NAME, 1> {
let mayLoad = 1;
let mayStore = 1;
@@ -313,30 +339,30 @@ def flat_truncstorei16 : flat_st <truncstorei16>;
// Patterns for global loads with no offset.
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
(vt (node i64:$addr)),
- (inst $addr, 0, 0)
+ (inst $addr, 0, 0, 0)
>;
class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
(vt (node i64:$addr)),
- (inst $addr, 1, 0)
+ (inst $addr, 0, 1, 0)
>;
class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
(node vt:$data, i64:$addr),
- (inst $addr, $data, 0, 0)
+ (inst $addr, $data, 0, 0, 0)
>;
class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
// atomic store follows atomic binop convention so the address comes
// first.
(node i64:$addr, vt:$data),
- (inst $addr, $data, 1, 0)
+ (inst $addr, $data, 0, 1, 0)
>;
class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
ValueType data_vt = vt> : Pat <
(vt (node i64:$addr, data_vt:$data)),
- (inst $addr, $data, 0)
+ (inst $addr, $data, 0, 0)
>;
let Predicates = [isCIVI] in {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 445bf79a781..470a47b0244 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -492,11 +492,21 @@ class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> {
let ParserMatchClass = MatchClass;
}
+class NamedOperandU12<string Name, AsmOperandClass MatchClass> : Operand<i16> {
+ let PrintMethod = "print"#Name;
+ let ParserMatchClass = MatchClass;
+}
+
class NamedOperandU16<string Name, AsmOperandClass MatchClass> : Operand<i16> {
let PrintMethod = "print"#Name;
let ParserMatchClass = MatchClass;
}
+class NamedOperandS13<string Name, AsmOperandClass MatchClass> : Operand<i16> {
+ let PrintMethod = "print"#Name;
+ let ParserMatchClass = MatchClass;
+}
+
class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
let PrintMethod = "print"#Name;
let ParserMatchClass = MatchClass;
@@ -514,6 +524,8 @@ def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>;
def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>;
+def offset_u12 : NamedOperandU12<"Offset", NamedMatchClass<"OffsetU12">>;
+def offset_s13 : NamedOperandS13<"Offset", NamedMatchClass<"OffsetS13">>;
def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>;
def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>;
def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>;
OpenPOWER on IntegriCloud