summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorRyan Taylor <rtayl@amd.com>2018-08-28 15:07:30 +0000
committerRyan Taylor <rtayl@amd.com>2018-08-28 15:07:30 +0000
commit1f334d006254331ab4e1244c35fcdf2ed53266af (patch)
treeda59b059718f70573eb11402f73adeda99206c6a /llvm/lib
parent4269d64b20e6edfffdd8504b3d8f3168079dab13 (diff)
downloadbcm5719-llvm-1f334d006254331ab4e1244c35fcdf2ed53266af.tar.gz
bcm5719-llvm-1f334d006254331ab4e1244c35fcdf2ed53266af.zip
[AMDGPU] Add support for a16 modifiear for gfx9
Summary: Adding support for a16 for gfx9. A16 bit replaces r128 bit for gfx9. Change-Id: Ie8b881e4e6d2f023fb5e0150420893513e5f4841 Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, llvm-commits Differential Revision: https://reviews.llvm.org/D50575 llvm-svn: 340831
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td11
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h5
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp37
-rw-r--r--llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h2
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td8
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp58
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td2
9 files changed, 86 insertions, 45 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 8945aafc4b6..432b70c4502 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -242,6 +242,12 @@ def FeatureDPP : SubtargetFeature<"dpp",
"Support DPP (Data Parallel Primitives) extension"
>;
+def FeatureR128A16 : SubtargetFeature<"r128-a16",
+ "HasR128A16",
+ "true",
+ "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
+>;
+
def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
"HasIntClamp",
"true",
@@ -444,7 +450,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
- FeatureAddNoCarryInsts, FeatureScalarAtomics
+ FeatureAddNoCarryInsts, FeatureScalarAtomics, FeatureR128A16
]
>;
@@ -703,6 +709,9 @@ def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">,
def HasDPP : Predicate<"Subtarget->hasDPP()">,
AssemblerPredicate<"FeatureDPP">;
+def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
+ AssemblerPredicate<"FeatureR128A16">;
+
def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
AssemblerPredicate<"FeatureIntClamp">;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 1097938ca25..2e6840c7915 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -197,6 +197,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasSDWAMac(false),
HasSDWAOutModsVOPC(false),
HasDPP(false),
+ HasR128A16(false),
HasDLInsts(false),
D16PreservesUnusedBits(false),
FlatAddressSpace(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 03809d8372a..db9ebd1a778 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -358,6 +358,7 @@ protected:
bool HasSDWAMac;
bool HasSDWAOutModsVOPC;
bool HasDPP;
+ bool HasR128A16;
bool HasDLInsts;
bool D16PreservesUnusedBits;
bool FlatAddressSpace;
@@ -791,6 +792,10 @@ public:
return HasDPP;
}
+ bool hasR128A16() const {
+ return HasR128A16;
+ }
+
bool enableSIScheduler() const {
return EnableSIScheduler;
}
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 75deeb7bd67..fe10f7a0da9 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -156,7 +156,7 @@ public:
ImmTyDMask,
ImmTyUNorm,
ImmTyDA,
- ImmTyR128,
+ ImmTyR128A16,
ImmTyLWE,
ImmTyExpTgt,
ImmTyExpCompr,
@@ -290,7 +290,7 @@ public:
bool isDMask() const { return isImmTy(ImmTyDMask); }
bool isUNorm() const { return isImmTy(ImmTyUNorm); }
bool isDA() const { return isImmTy(ImmTyDA); }
- bool isR128() const { return isImmTy(ImmTyR128); }
+ bool isR128A16() const { return isImmTy(ImmTyR128A16); }
bool isLWE() const { return isImmTy(ImmTyLWE); }
bool isOff() const { return isImmTy(ImmTyOff); }
bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
@@ -678,7 +678,7 @@ public:
case ImmTyDMask: OS << "DMask"; break;
case ImmTyUNorm: OS << "UNorm"; break;
case ImmTyDA: OS << "DA"; break;
- case ImmTyR128: OS << "R128"; break;
+ case ImmTyR128A16: OS << "R128A16"; break;
case ImmTyLWE: OS << "LWE"; break;
case ImmTyOff: OS << "Off"; break;
case ImmTyExpTgt: OS << "ExpTgt"; break;
@@ -1090,7 +1090,6 @@ private:
bool validateMIMGAtomicDMask(const MCInst &Inst);
bool validateMIMGGatherDMask(const MCInst &Inst);
bool validateMIMGDataSize(const MCInst &Inst);
- bool validateMIMGR128(const MCInst &Inst);
bool validateMIMGD16(const MCInst &Inst);
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
@@ -2445,22 +2444,6 @@ bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
}
-bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst) {
-
- const unsigned Opc = Inst.getOpcode();
- const MCInstrDesc &Desc = MII.get(Opc);
-
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
- return true;
-
- int Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
- assert(Idx != -1);
-
- bool R128 = (Inst.getOperand(Idx).getImm() != 0);
-
- return !R128 || hasMIMG_R128();
-}
-
bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
@@ -2495,11 +2478,6 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"integer clamping is not supported on this GPU");
return false;
}
- if (!validateMIMGR128(Inst)) {
- Error(IDLoc,
- "r128 modifier is not supported on this GPU");
- return false;
- }
// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
if (!validateMIMGD16(Inst)) {
Error(IDLoc,
@@ -3463,6 +3441,10 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
case AsmToken::Identifier: {
StringRef Tok = Parser.getTok().getString();
if (Tok == Name) {
+ if (Tok == "r128" && isGFX9())
+ Error(S, "r128 modifier is not supported on this GPU");
+ if (Tok == "a16" && !isGFX9())
+ Error(S, "a16 modifier is not supported on this GPU");
Bit = 1;
Parser.Lex();
} else if (Tok.startswith("no") && Tok.endswith(Name)) {
@@ -4705,7 +4687,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
@@ -4815,7 +4797,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
{"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
{"da", AMDGPUOperand::ImmTyDA, true, nullptr},
- {"r128", AMDGPUOperand::ImmTyR128, true, nullptr},
+ {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
+ {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
{"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
{"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 001d106cc72..76b19761ee1 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -207,9 +207,12 @@ void AMDGPUInstPrinter::printDA(const MCInst *MI, unsigned OpNo,
printNamedBit(MI, OpNo, O, "da");
}
-void AMDGPUInstPrinter::printR128(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "r128");
+ if (STI.hasFeature(AMDGPU::FeatureR128A16))
+ printNamedBit(MI, OpNo, O, "a16");
+ else
+ printNamedBit(MI, OpNo, O, "r128");
}
void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index 75213720425..0ba74ca0f3e 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -80,7 +80,7 @@ private:
raw_ostream &O);
void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printR128(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printLWE(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 44c2d366e46..1462682e761 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -141,7 +141,7 @@ class MIMG_NoSampler_Helper <bits<7> op, string asm,
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
@@ -199,7 +199,7 @@ class MIMG_Store_Helper <bits<7> op, string asm,
let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
@@ -252,7 +252,7 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da";
}
@@ -316,7 +316,7 @@ class MIMG_Sampler_Helper <bits<7> op, string asm, RegisterClass dst_rc,
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 45441564efd..5916395acf4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4576,6 +4576,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
const AMDGPU::ImageDimIntrinsicInfo *Intr,
SelectionDAG &DAG) const {
SDLoc DL(Op);
+ MachineFunction &MF = DAG.getMachineFunction();
+ const GCNSubtarget* ST = &MF.getSubtarget<GCNSubtarget>();
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
@@ -4585,6 +4587,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end());
bool IsD16 = false;
+ bool IsA16 = false;
SDValue VData;
int NumVDataDwords;
unsigned AddrIdx; // Index of first address argument
@@ -4660,23 +4663,59 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}
}
- unsigned NumVAddrs = BaseOpcode->NumExtraArgs +
- (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
- (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
- (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+ unsigned NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0;
+ unsigned NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0;
+ unsigned NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0;
+ unsigned NumVAddrs = BaseOpcode->NumExtraArgs + NumGradients +
+ NumCoords + NumLCM;
+ unsigned NumMIVAddrs = NumVAddrs;
+
SmallVector<SDValue, 4> VAddrs;
- for (unsigned i = 0; i < NumVAddrs; ++i)
- VAddrs.push_back(Op.getOperand(AddrIdx + i));
// Optimize _L to _LZ when _L is zero
if (LZMappingInfo) {
if (auto ConstantLod =
- dyn_cast<ConstantFPSDNode>(VAddrs[NumVAddrs-1].getNode())) {
+ dyn_cast<ConstantFPSDNode>(Op.getOperand(AddrIdx+NumVAddrs-1))) {
if (ConstantLod->isZero() || ConstantLod->isNegative()) {
IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
- VAddrs.pop_back(); // remove 'lod'
+ NumMIVAddrs--; // remove 'lod'
+ }
+ }
+ }
+
+ // Check for 16 bit addresses and pack if true.
+ unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
+ MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
+ if (VAddrVT.getScalarType() == MVT::f16 &&
+ ST->hasFeature(AMDGPU::FeatureR128A16)) {
+ IsA16 = true;
+ for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {
+ SDValue AddrLo, AddrHi;
+ // Push back extra arguments.
+ if (i < DimIdx) {
+ AddrLo = Op.getOperand(i);
+ } else {
+ AddrLo = Op.getOperand(i);
+ // Dz/dh, dz/dv and the last odd coord are packed with undef. Also,
+ // in 1D, derivatives dx/dh and dx/dv are packed with undef.
+ if (((i + 1) >= (AddrIdx + NumMIVAddrs)) ||
+ ((NumGradients / 2) % 2 == 1 &&
+ (i == DimIdx + (NumGradients / 2) - 1 ||
+ i == DimIdx + NumGradients - 1))) {
+ AddrHi = DAG.getUNDEF(MVT::f16);
+ } else {
+ AddrHi = Op.getOperand(i + 1);
+ i++;
+ }
+ AddrLo = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f16,
+ {AddrLo, AddrHi});
+ AddrLo = DAG.getBitcast(MVT::i32, AddrLo);
}
+ VAddrs.push_back(AddrLo);
}
+ } else {
+ for (unsigned i = 0; i < NumMIVAddrs; ++i)
+ VAddrs.push_back(Op.getOperand(AddrIdx + i));
}
SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
@@ -4725,7 +4764,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
Ops.push_back(Unorm);
Ops.push_back(GLC);
Ops.push_back(SLC);
- Ops.push_back(False); // r128
+ Ops.push_back(IsA16 && // a16 or r128
+ ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
Ops.push_back(False); // tfe
Ops.push_back(False); // lwe
Ops.push_back(DimInfo->DA ? True : False);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index f6ce31f008c..27bbaf3091b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -754,7 +754,7 @@ def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
-def R128 : NamedOperandBit<"R128", NamedMatchClass<"R128">>;
+def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;
def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
OpenPOWER on IntegriCloud