diff options
author | Ryan Taylor <rtayl@amd.com> | 2018-08-28 15:07:30 +0000 |
---|---|---|
committer | Ryan Taylor <rtayl@amd.com> | 2018-08-28 15:07:30 +0000 |
commit | 1f334d006254331ab4e1244c35fcdf2ed53266af (patch) | |
tree | da59b059718f70573eb11402f73adeda99206c6a /llvm/lib | |
parent | 4269d64b20e6edfffdd8504b3d8f3168079dab13 (diff) | |
download | bcm5719-llvm-1f334d006254331ab4e1244c35fcdf2ed53266af.tar.gz bcm5719-llvm-1f334d006254331ab4e1244c35fcdf2ed53266af.zip |
[AMDGPU] Add support for a16 modifiear for gfx9
Summary:
Adding support for a16 for gfx9. A16 bit replaces r128 bit for gfx9.
Change-Id: Ie8b881e4e6d2f023fb5e0150420893513e5f4841
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50575
llvm-svn: 340831
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 11 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 37 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MIMGInstructions.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 58 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 |
9 files changed, 86 insertions, 45 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 8945aafc4b6..432b70c4502 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -242,6 +242,12 @@ def FeatureDPP : SubtargetFeature<"dpp", "Support DPP (Data Parallel Primitives) extension" >; +def FeatureR128A16 : SubtargetFeature<"r128-a16", + "HasR128A16", + "true", + "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9" +>; + def FeatureIntClamp : SubtargetFeature<"int-clamp-insts", "HasIntClamp", "true", @@ -444,7 +450,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, - FeatureAddNoCarryInsts, FeatureScalarAtomics + FeatureAddNoCarryInsts, FeatureScalarAtomics, FeatureR128A16 ] >; @@ -703,6 +709,9 @@ def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">, def HasDPP : Predicate<"Subtarget->hasDPP()">, AssemblerPredicate<"FeatureDPP">; +def HasR128A16 : Predicate<"Subtarget->hasR128A16()">, + AssemblerPredicate<"FeatureR128A16">; + def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">, AssemblerPredicate<"FeatureIntClamp">; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 1097938ca25..2e6840c7915 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -197,6 +197,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasSDWAMac(false), HasSDWAOutModsVOPC(false), HasDPP(false), + HasR128A16(false), HasDLInsts(false), D16PreservesUnusedBits(false), FlatAddressSpace(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 03809d8372a..db9ebd1a778 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -358,6 +358,7 @@ protected: bool HasSDWAMac; bool HasSDWAOutModsVOPC; bool HasDPP; + bool HasR128A16; bool HasDLInsts; bool D16PreservesUnusedBits; bool FlatAddressSpace; @@ -791,6 +792,10 @@ public: return HasDPP; } + bool hasR128A16() const { + return HasR128A16; + } + bool enableSIScheduler() const { return EnableSIScheduler; } diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 75deeb7bd67..fe10f7a0da9 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -156,7 +156,7 @@ public: ImmTyDMask, ImmTyUNorm, ImmTyDA, - ImmTyR128, + ImmTyR128A16, ImmTyLWE, ImmTyExpTgt, ImmTyExpCompr, @@ -290,7 +290,7 @@ public: bool isDMask() const { return isImmTy(ImmTyDMask); } bool isUNorm() const { return isImmTy(ImmTyUNorm); } bool isDA() const { return isImmTy(ImmTyDA); } - bool isR128() const { return isImmTy(ImmTyR128); } + bool isR128A16() const { return isImmTy(ImmTyR128A16); } bool isLWE() const { return isImmTy(ImmTyLWE); } bool isOff() const { return isImmTy(ImmTyOff); } bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } @@ -678,7 +678,7 @@ public: case ImmTyDMask: OS << "DMask"; break; case ImmTyUNorm: OS << "UNorm"; break; case ImmTyDA: OS << "DA"; break; - case ImmTyR128: OS << "R128"; break; + case ImmTyR128A16: OS << "R128A16"; break; case ImmTyLWE: OS << "LWE"; break; case ImmTyOff: OS << "Off"; break; case ImmTyExpTgt: OS << "ExpTgt"; break; @@ -1090,7 +1090,6 @@ private: bool validateMIMGAtomicDMask(const MCInst &Inst); bool validateMIMGGatherDMask(const MCInst &Inst); bool validateMIMGDataSize(const MCInst &Inst); - bool validateMIMGR128(const MCInst &Inst); bool validateMIMGD16(const MCInst &Inst); bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; @@ -2445,22 +2444,6 @@ bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; } -bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst) { - - const unsigned Opc = Inst.getOpcode(); - const MCInstrDesc &Desc = MII.get(Opc); - - if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) - return true; - - int Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128); - assert(Idx != -1); - - bool R128 = (Inst.getOperand(Idx).getImm() != 0); - - return !R128 || hasMIMG_R128(); -} - bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { const unsigned Opc = Inst.getOpcode(); @@ -2495,11 +2478,6 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, "integer clamping is not supported on this GPU"); return false; } - if (!validateMIMGR128(Inst)) { - Error(IDLoc, - "r128 modifier is not supported on this GPU"); - return false; - } // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. if (!validateMIMGD16(Inst)) { Error(IDLoc, @@ -3463,6 +3441,10 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, case AsmToken::Identifier: { StringRef Tok = Parser.getTok().getString(); if (Tok == Name) { + if (Tok == "r128" && isGFX9()) + Error(S, "r128 modifier is not supported on this GPU"); + if (Tok == "a16" && !isGFX9()) + Error(S, "a16 modifier is not supported on this GPU"); Bit = 1; Parser.Lex(); } else if (Tok.startswith("no") && Tok.endswith(Name)) { @@ -4705,7 +4687,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); @@ -4815,7 +4797,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, - {"r128", AMDGPUOperand::ImmTyR128, true, nullptr}, + {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, + {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index 001d106cc72..76b19761ee1 100644 --- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -207,9 +207,12 @@ void AMDGPUInstPrinter::printDA(const MCInst *MI, unsigned OpNo, printNamedBit(MI, OpNo, O, "da"); } -void AMDGPUInstPrinter::printR128(const MCInst *MI, unsigned OpNo, +void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - printNamedBit(MI, OpNo, O, "r128"); + if (STI.hasFeature(AMDGPU::FeatureR128A16)) + printNamedBit(MI, OpNo, O, "a16"); + else + printNamedBit(MI, OpNo, O, "r128"); } void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo, diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h index 75213720425..0ba74ca0f3e 100644 --- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -80,7 +80,7 @@ private: raw_ostream &O); void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); - void printR128(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printLWE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 44c2d366e46..1462682e761 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -141,7 +141,7 @@ class MIMG_NoSampler_Helper <bits<7> op, string asm, let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc, DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc, - R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da), + R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da" #!if(BaseOpcode.HasD16, "$d16", ""); @@ -199,7 +199,7 @@ class MIMG_Store_Helper <bits<7> op, string asm, let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc, - R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da), + R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da" #!if(BaseOpcode.HasD16, "$d16", ""); @@ -252,7 +252,7 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc, let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc, - R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da); + R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da); let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"; } @@ -316,7 +316,7 @@ class MIMG_Sampler_Helper <bits<7> op, string asm, RegisterClass dst_rc, let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc, - R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da), + R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da" #!if(BaseOpcode.HasD16, "$d16", ""); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 45441564efd..5916395acf4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4576,6 +4576,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr, SelectionDAG &DAG) const { SDLoc DL(Op); + MachineFunction &MF = DAG.getMachineFunction(); + const GCNSubtarget* ST = &MF.getSubtarget<GCNSubtarget>(); const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode); const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim); @@ -4585,6 +4587,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op, SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end()); bool IsD16 = false; + bool IsA16 = false; SDValue VData; int NumVDataDwords; unsigned AddrIdx; // Index of first address argument @@ -4660,23 +4663,59 @@ SDValue SITargetLowering::lowerImage(SDValue Op, } } - unsigned NumVAddrs = BaseOpcode->NumExtraArgs + - (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + - (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + - (BaseOpcode->LodOrClampOrMip ? 1 : 0); + unsigned NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0; + unsigned NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0; + unsigned NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0; + unsigned NumVAddrs = BaseOpcode->NumExtraArgs + NumGradients + + NumCoords + NumLCM; + unsigned NumMIVAddrs = NumVAddrs; + SmallVector<SDValue, 4> VAddrs; - for (unsigned i = 0; i < NumVAddrs; ++i) - VAddrs.push_back(Op.getOperand(AddrIdx + i)); // Optimize _L to _LZ when _L is zero if (LZMappingInfo) { if (auto ConstantLod = - dyn_cast<ConstantFPSDNode>(VAddrs[NumVAddrs-1].getNode())) { + dyn_cast<ConstantFPSDNode>(Op.getOperand(AddrIdx+NumVAddrs-1))) { if (ConstantLod->isZero() || ConstantLod->isNegative()) { IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l - VAddrs.pop_back(); // remove 'lod' + NumMIVAddrs--; // remove 'lod' + } + } + } + + // Check for 16 bit addresses and pack if true. + unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs; + MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType(); + if (VAddrVT.getScalarType() == MVT::f16 && + ST->hasFeature(AMDGPU::FeatureR128A16)) { + IsA16 = true; + for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) { + SDValue AddrLo, AddrHi; + // Push back extra arguments. + if (i < DimIdx) { + AddrLo = Op.getOperand(i); + } else { + AddrLo = Op.getOperand(i); + // Dz/dh, dz/dv and the last odd coord are packed with undef. Also, + // in 1D, derivatives dx/dh and dx/dv are packed with undef. + if (((i + 1) >= (AddrIdx + NumMIVAddrs)) || + ((NumGradients / 2) % 2 == 1 && + (i == DimIdx + (NumGradients / 2) - 1 || + i == DimIdx + NumGradients - 1))) { + AddrHi = DAG.getUNDEF(MVT::f16); + } else { + AddrHi = Op.getOperand(i + 1); + i++; + } + AddrLo = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f16, + {AddrLo, AddrHi}); + AddrLo = DAG.getBitcast(MVT::i32, AddrLo); } + VAddrs.push_back(AddrLo); } + } else { + for (unsigned i = 0; i < NumMIVAddrs; ++i) + VAddrs.push_back(Op.getOperand(AddrIdx + i)); } SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs); @@ -4725,7 +4764,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op, Ops.push_back(Unorm); Ops.push_back(GLC); Ops.push_back(SLC); - Ops.push_back(False); // r128 + Ops.push_back(IsA16 && // a16 or r128 + ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False); Ops.push_back(False); // tfe Ops.push_back(False); // lwe Ops.push_back(DimInfo->DA ? True : False); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index f6ce31f008c..27bbaf3091b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -754,7 +754,7 @@ def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>; def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>; def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>; -def R128 : NamedOperandBit<"R128", NamedMatchClass<"R128">>; +def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>; def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>; def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>; def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>; |