summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp31
-rw-r--r--llvm/lib/Target/AMDGPU/BUFInstructions.td8
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h2
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td574
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrFormats.td10
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h8
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td18
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h3
-rw-r--r--llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir4
-rw-r--r--llvm/test/CodeGen/AMDGPU/memory_clause.mir24
-rw-r--r--llvm/test/MC/AMDGPU/mimg.s17
16 files changed, 319 insertions, 419 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 05c7d5d8485..438ea0e7bb4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -109,8 +109,7 @@ int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
// Adjust the encoding family to GFX80 for D16 buffer instructions when the
// subtarget has UnpackedD16VMem feature.
// TODO: remove this when we discard GFX80 encoding.
- if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
- && !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
+ if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
Gen = SIEncodingFamily::GFX80;
int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 18cc67f3fc4..57c552da19f 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -2301,10 +2301,6 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
return true;
- // Gather4 instructions do not need validation: dst size is hardcoded.
- if (Desc.TSFlags & SIInstrFlags::Gather4)
- return true;
-
int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
@@ -2319,9 +2315,12 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
if (DMask == 0)
DMask = 1;
- unsigned DataSize = countPopulation(DMask);
- if ((Desc.TSFlags & SIInstrFlags::D16) != 0 && hasPackedD16()) {
- DataSize = (DataSize + 1) / 2;
+ unsigned DataSize =
+ (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
+ if (hasPackedD16()) {
+ int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
+ if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
+ DataSize = (DataSize + 1) / 2;
}
return (VDataSize / 4) == DataSize + TFESize;
@@ -2389,10 +2388,14 @@ bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
return true;
- if ((Desc.TSFlags & SIInstrFlags::D16) == 0)
- return true;
- return !isCI() && !isSI();
+ int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
+ if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
+ if (isCI() || isSI())
+ return false;
+ }
+
+ return true;
}
bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
@@ -4261,6 +4264,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
}
void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
@@ -4287,6 +4291,10 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
}
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultD16() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyD16);
+}
+
//===----------------------------------------------------------------------===//
// smrd
//===----------------------------------------------------------------------===//
@@ -4389,6 +4397,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"da", AMDGPUOperand::ImmTyDA, true, nullptr},
{"r128", AMDGPUOperand::ImmTyR128, true, nullptr},
{"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
+ {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
{"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
{"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
{"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
@@ -5094,8 +5103,6 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
case MCK_glc:
return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
- case MCK_d16:
- return Operand.isD16() ? Match_Success : Match_InvalidOperand;
case MCK_idxen:
return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
case MCK_offen:
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 18e28b90e06..b87c47a6b9e 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -720,7 +720,7 @@ defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Pseudo_Stores <
"buffer_store_format_xyzw", VReg_128
>;
-let SubtargetPredicate = HasUnpackedD16VMem, D16 = 1 in {
+let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in {
defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Loads <
"buffer_load_format_d16_x", VGPR_32
>;
@@ -747,7 +747,7 @@ let SubtargetPredicate = HasUnpackedD16VMem, D16 = 1 in {
>;
} // End HasUnpackedD16VMem.
-let SubtargetPredicate = HasPackedD16VMem, D16 = 1 in {
+let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Pseudo_Loads <
"buffer_load_format_d16_x", VGPR_32
>;
@@ -990,7 +990,7 @@ defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy",
defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_128>;
defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>;
-let SubtargetPredicate = HasUnpackedD16VMem, D16 = 1 in {
+let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in {
defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>;
defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64>;
defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96>;
@@ -1001,7 +1001,7 @@ let SubtargetPredicate = HasUnpackedD16VMem, D16 = 1 in {
defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128>;
} // End HasUnpackedD16VMem.
-let SubtargetPredicate = HasPackedD16VMem, D16 = 1 in {
+let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>;
defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32>;
defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64>;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index db725dfc6d0..741cf0ea6cd 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -289,10 +289,6 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
// as if it has 1 dword, which could be not really so.
DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
- if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4) {
- return MCDisassembler::Success;
- }
-
int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::vdst);
@@ -304,22 +300,25 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::tfe);
+ int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::d16);
assert(VDataIdx != -1);
assert(DMaskIdx != -1);
assert(TFEIdx != -1);
bool IsAtomic = (VDstIdx != -1);
+ bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
if (DMask == 0)
return MCDisassembler::Success;
- unsigned DstSize = countPopulation(DMask);
+ unsigned DstSize = IsGather4 ? 4 : countPopulation(DMask);
if (DstSize == 1)
return MCDisassembler::Success;
- bool D16 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::D16;
+ bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
if (D16 && AMDGPU::hasPackedD16(STI)) {
DstSize = (DstSize + 1) / 2;
}
@@ -335,6 +334,11 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
NewOpcode = AMDGPU::getMaskedMIMGAtomicOp(*MCII, MI.getOpcode(), DstSize);
}
if (NewOpcode == -1) return MCDisassembler::Success;
+ } else if (IsGather4) {
+ if (D16 && AMDGPU::hasPackedD16(STI))
+ NewOpcode = AMDGPU::getMIMGGatherOpPackedD16(MI.getOpcode());
+ else
+ return MCDisassembler::Success;
} else {
NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), DstSize);
assert(NewOpcode != -1 && "could not find matching mimg channel instruction");
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index a11e20bf046..51f926d0a8e 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -217,6 +217,11 @@ void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
printNamedBit(MI, OpNo, O, "lwe");
}
+void AMDGPUInstPrinter::printD16(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ printNamedBit(MI, OpNo, O, "d16");
+}
+
void AMDGPUInstPrinter::printExpCompr(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index 8a51b628fe5..1bce0bbd5dc 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -84,6 +84,8 @@ private:
raw_ostream &O);
void printLWE(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printD16(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printExpCompr(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printExpVM(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 702660c00b5..645bc404655 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -17,6 +17,11 @@ class MIMG_Atomic_Size <string op, bit is32Bit> {
int AtomicSize = !if(is32Bit, 1, 2);
}
+class MIMG_Gather_Size <string op, int channels> {
+ string Op = op;
+ int Channels = channels;
+}
+
class mimg <bits<7> si, bits<7> vi = si> {
field bits<7> SI = si;
field bits<7> VI = vi;
@@ -37,125 +42,88 @@ class MIMG_Helper <dag outs, dag ins, string asm,
class MIMG_NoSampler_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
RegisterClass addr_rc,
- bit d16_bit=0,
- string dns=""> : MIMG_Helper <
- (outs dst_rc:$vdata),
- (ins addr_rc:$vaddr, SReg_256:$srsrc,
- DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
- asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""),
- dns>, MIMGe<op> {
+ bit has_d16,
+ string dns="">
+ : MIMG_Helper <(outs dst_rc:$vdata),
+ !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
+ DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
+ R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+ !if(has_d16, (ins D16:$d16), (ins))),
+ asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
+ #!if(has_d16, "$d16", ""),
+ dns>,
+ MIMGe<op> {
let ssamp = 0;
- let D16 = d16;
-}
-multiclass MIMG_NoSampler_Src_Helper_Helper <bits<7> op, string asm,
- RegisterClass dst_rc,
- int channels, bit d16_bit,
- string suffix> {
- def NAME # _V1 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32, d16_bit,
- !if(!eq(channels, 1), "AMDGPU", "")>,
- MIMG_Mask<asm#"_V1"#suffix, channels>;
- def NAME # _V2 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64, d16_bit>,
- MIMG_Mask<asm#"_V2"#suffix, channels>;
- def NAME # _V3 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_96, d16_bit>,
- MIMG_Mask<asm#"_V3"#suffix, channels>;
- def NAME # _V4 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128, d16_bit>,
- MIMG_Mask<asm#"_V4"#suffix, channels>;
+ let HasD16 = has_d16;
+ let d16 = !if(HasD16, ?, 0);
}
multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
- RegisterClass dst_rc,
- int channels> {
- defm NAME : MIMG_NoSampler_Src_Helper_Helper <op, asm, dst_rc, channels, 0, "">;
-
- let d16 = 1 in {
- let SubtargetPredicate = HasPackedD16VMem in {
- defm NAME : MIMG_NoSampler_Src_Helper_Helper <op, asm, dst_rc, channels, 1, "_D16">;
- } // End HasPackedD16VMem.
-
- let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
- defm NAME : MIMG_NoSampler_Src_Helper_Helper <op, asm, dst_rc, channels, 1, "_D16_gfx80">;
- } // End HasUnpackedD16VMem.
- } // End d16 = 1.
-}
-
-multiclass MIMG_NoSampler <bits<7> op, string asm> {
- defm _V1 : MIMG_NoSampler_Src_Helper <op, asm, VGPR_32, 1>;
- defm _V2 : MIMG_NoSampler_Src_Helper <op, asm, VReg_64, 2>;
- defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 3>;
- defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 4>;
+ RegisterClass dst_rc,
+ int channels, bit has_d16> {
+ def NAME # _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32, has_d16,
+ !if(!eq(channels, 1), "AMDGPU", "")>,
+ MIMG_Mask<asm#"_V1", channels>;
+ def NAME # _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64, has_d16>,
+ MIMG_Mask<asm#"_V2", channels>;
+ def NAME # _V3 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_96, has_d16>,
+ MIMG_Mask<asm#"_V3", channels>;
+ def NAME # _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128, has_d16>,
+ MIMG_Mask<asm#"_V4", channels>;
}
-multiclass MIMG_PckNoSampler <bits<7> op, string asm> {
- defm NAME # _V1 : MIMG_NoSampler_Src_Helper_Helper <op, asm, VGPR_32, 1, 0, "">;
- defm NAME # _V2 : MIMG_NoSampler_Src_Helper_Helper <op, asm, VReg_64, 2, 0, "">;
- defm NAME # _V3 : MIMG_NoSampler_Src_Helper_Helper <op, asm, VReg_96, 3, 0, "">;
- defm NAME # _V4 : MIMG_NoSampler_Src_Helper_Helper <op, asm, VReg_128, 4, 0, "">;
+multiclass MIMG_NoSampler <bits<7> op, string asm, bit has_d16> {
+ defm _V1 : MIMG_NoSampler_Src_Helper <op, asm, VGPR_32, 1, has_d16>;
+ defm _V2 : MIMG_NoSampler_Src_Helper <op, asm, VReg_64, 2, has_d16>;
+ defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 3, has_d16>;
+ defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 4, has_d16>;
}
class MIMG_Store_Helper <bits<7> op, string asm,
RegisterClass data_rc,
RegisterClass addr_rc,
- bit d16_bit=0,
- string dns = ""> : MIMG_Helper <
- (outs),
- (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
- DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
- asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), dns>, MIMGe<op> {
+ bit has_d16,
+ string dns = "">
+ : MIMG_Helper <(outs),
+ !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
+ DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
+ R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+ !if(has_d16, (ins D16:$d16), (ins))),
+ asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
+ #!if(has_d16, "$d16", ""),
+ dns>,
+ MIMGe<op> {
let ssamp = 0;
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let hasPostISelHook = 0;
let DisableWQM = 1;
- let D16 = d16;
-}
-multiclass MIMG_Store_Addr_Helper_Helper <bits<7> op, string asm,
- RegisterClass data_rc,
- int channels, bit d16_bit,
- string suffix> {
- def NAME # _V1 # suffix : MIMG_Store_Helper <op, asm, data_rc, VGPR_32, d16_bit,
- !if(!eq(channels, 1), "AMDGPU", "")>,
- MIMG_Mask<asm#"_V1"#suffix, channels>;
- def NAME # _V2 # suffix : MIMG_Store_Helper <op, asm, data_rc, VReg_64, d16_bit>,
- MIMG_Mask<asm#"_V2"#suffix, channels>;
- def NAME # _V3 # suffix : MIMG_Store_Helper <op, asm, data_rc, VReg_96, d16_bit>,
- MIMG_Mask<asm#"_V3"#suffix, channels>;
- def NAME # _V4 # suffix : MIMG_Store_Helper <op, asm, data_rc, VReg_128, d16_bit>,
- MIMG_Mask<asm#"_V4"#suffix, channels>;
+ let HasD16 = has_d16;
+ let d16 = !if(HasD16, ?, 0);
}
multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,
RegisterClass data_rc,
- int channels> {
- defm NAME : MIMG_Store_Addr_Helper_Helper <op, asm, data_rc, channels, 0, "">;
-
- let d16 = 1 in {
- let SubtargetPredicate = HasPackedD16VMem in {
- defm NAME : MIMG_Store_Addr_Helper_Helper <op, asm, data_rc, channels, 1, "_D16">;
- } // End HasPackedD16VMem.
-
- let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
- defm NAME : MIMG_Store_Addr_Helper_Helper <op, asm, data_rc, channels, 1, "_D16_gfx80">;
- } // End HasUnpackedD16VMem.
- } // End d16 = 1.
+ int channels, bit has_d16> {
+ def NAME # _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32, has_d16,
+ !if(!eq(channels, 1), "AMDGPU", "")>,
+ MIMG_Mask<asm#"_V1", channels>;
+ def NAME # _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64, has_d16>,
+ MIMG_Mask<asm#"_V2", channels>;
+ def NAME # _V3 : MIMG_Store_Helper <op, asm, data_rc, VReg_96, has_d16>,
+ MIMG_Mask<asm#"_V3", channels>;
+ def NAME # _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128, has_d16>,
+ MIMG_Mask<asm#"_V4", channels>;
}
-multiclass MIMG_Store <bits<7> op, string asm> {
- defm _V1 : MIMG_Store_Addr_Helper <op, asm, VGPR_32, 1>;
- defm _V2 : MIMG_Store_Addr_Helper <op, asm, VReg_64, 2>;
- defm _V3 : MIMG_Store_Addr_Helper <op, asm, VReg_96, 3>;
- defm _V4 : MIMG_Store_Addr_Helper <op, asm, VReg_128, 4>;
-}
-
-multiclass MIMG_PckStore <bits<7> op, string asm> {
- defm NAME # _V1 : MIMG_Store_Addr_Helper_Helper <op, asm, VGPR_32, 1, 0, "">;
- defm NAME # _V2 : MIMG_Store_Addr_Helper_Helper <op, asm, VReg_64, 2, 0, "">;
- defm NAME # _V3 : MIMG_Store_Addr_Helper_Helper <op, asm, VReg_96, 3, 0, "">;
- defm NAME # _V4 : MIMG_Store_Addr_Helper_Helper <op, asm, VReg_128, 4, 0, "">;
+multiclass MIMG_Store <bits<7> op, string asm, bit has_d16> {
+ defm _V1 : MIMG_Store_Addr_Helper <op, asm, VGPR_32, 1, has_d16>;
+ defm _V2 : MIMG_Store_Addr_Helper <op, asm, VReg_64, 2, has_d16>;
+ defm _V3 : MIMG_Store_Addr_Helper <op, asm, VReg_96, 3, has_d16>;
+ defm _V4 : MIMG_Store_Addr_Helper <op, asm, VReg_128, 4, has_d16>;
}
class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
@@ -177,23 +145,27 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
}
class MIMG_Atomic_Real_si<mimg op, string name, string asm,
- RegisterClass data_rc, RegisterClass addr_rc, bit enableDasm> :
- MIMG_Atomic_Helper<asm, data_rc, addr_rc, "SICI", enableDasm>,
- SIMCInstr<name, SIEncodingFamily.SI>,
- MIMGe<op.SI> {
+ RegisterClass data_rc, RegisterClass addr_rc,
+ bit enableDasm>
+ : MIMG_Atomic_Helper<asm, data_rc, addr_rc, "SICI", enableDasm>,
+ SIMCInstr<name, SIEncodingFamily.SI>,
+ MIMGe<op.SI> {
let isCodeGenOnly = 0;
let AssemblerPredicates = [isSICI];
let DisableDecoder = DisableSIDecoder;
+ let d16 = 0;
}
class MIMG_Atomic_Real_vi<mimg op, string name, string asm,
- RegisterClass data_rc, RegisterClass addr_rc, bit enableDasm> :
- MIMG_Atomic_Helper<asm, data_rc, addr_rc, "VI", enableDasm>,
- SIMCInstr<name, SIEncodingFamily.VI>,
- MIMGe<op.VI> {
+ RegisterClass data_rc, RegisterClass addr_rc,
+ bit enableDasm>
+ : MIMG_Atomic_Helper<asm, data_rc, addr_rc, "VI", enableDasm>,
+ SIMCInstr<name, SIEncodingFamily.VI>,
+ MIMGe<op.VI> {
let isCodeGenOnly = 0;
let AssemblerPredicates = [isVI];
let DisableDecoder = DisableVIDecoder;
+ let d16 = 0;
}
multiclass MIMG_Atomic_Helper_m <mimg op,
@@ -245,59 +217,46 @@ multiclass MIMG_Atomic <mimg op, string asm,
class MIMG_Sampler_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
RegisterClass src_rc,
- bit wqm,
- bit d16_bit=0,
- string dns=""> : MIMG_Helper <
- (outs dst_rc:$vdata),
- (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
- DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
- asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""),
- dns>, MIMGe<op> {
+ bit wqm, bit has_d16,
+ string dns="">
+ : MIMG_Helper <(outs dst_rc:$vdata),
+ !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
+ DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
+ R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+ !if(has_d16, (ins D16:$d16), (ins))),
+ asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"
+ #!if(has_d16, "$d16", ""),
+ dns>,
+ MIMGe<op> {
let WQM = wqm;
- let D16 = d16;
-}
-multiclass MIMG_Sampler_Src_Helper_Helper <bits<7> op, string asm,
- RegisterClass dst_rc,
- int channels, bit wqm,
- bit d16_bit, string suffix> {
- def _V1 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm, d16_bit,
- !if(!eq(channels, 1), "AMDGPU", "")>,
- MIMG_Mask<asm#"_V1"#suffix, channels>;
- def _V2 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm, d16_bit>,
- MIMG_Mask<asm#"_V2"#suffix, channels>;
- def _V3 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_96, wqm, d16_bit>,
- MIMG_Mask<asm#"_V3"#suffix, channels>;
- def _V4 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm, d16_bit>,
- MIMG_Mask<asm#"_V4"#suffix, channels>;
- def _V8 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm, d16_bit>,
- MIMG_Mask<asm#"_V8"#suffix, channels>;
- def _V16 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm, d16_bit>,
- MIMG_Mask<asm#"_V16"#suffix, channels>;
+ let HasD16 = has_d16;
+ let d16 = !if(HasD16, ?, 0);
}
multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
- int channels, bit wqm> {
- defm "" : MIMG_Sampler_Src_Helper_Helper <op, asm, dst_rc, channels, wqm, 0, "">;
-
- let d16 = 1 in {
- let SubtargetPredicate = HasPackedD16VMem in {
- defm "" : MIMG_Sampler_Src_Helper_Helper <op, asm, dst_rc, channels, wqm, 1, "_D16">;
- } // End HasPackedD16VMem.
-
- let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
- defm "" : MIMG_Sampler_Src_Helper_Helper <op, asm, dst_rc, channels, wqm, 1, "_D16_gfx80">;
- } // End HasUnpackedD16VMem.
- } // End d16 = 1.
-}
-
-multiclass MIMG_Sampler <bits<7> op, string asm, bit wqm=0> {
- defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, wqm>;
- defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, wqm>;
- defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, wqm>;
- defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, wqm>;
+ int channels, bit wqm, bit has_d16> {
+ def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm, has_d16,
+ !if(!eq(channels, 1), "AMDGPU", "")>,
+ MIMG_Mask<asm#"_V1", channels>;
+ def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm, has_d16>,
+ MIMG_Mask<asm#"_V2", channels>;
+ def _V3 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_96, wqm, has_d16>,
+ MIMG_Mask<asm#"_V3", channels>;
+ def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm, has_d16>,
+ MIMG_Mask<asm#"_V4", channels>;
+ def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm, has_d16>,
+ MIMG_Mask<asm#"_V8", channels>;
+ def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm, has_d16>,
+ MIMG_Mask<asm#"_V16", channels>;
+}
+
+multiclass MIMG_Sampler <bits<7> op, string asm, bit wqm = 0, bit has_d16 = 1> {
+ defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, wqm, has_d16>;
+ defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, wqm, has_d16>;
+ defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, wqm, has_d16>;
+ defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, wqm, has_d16>;
}
multiclass MIMG_Sampler_WQM <bits<7> op, string asm> : MIMG_Sampler<op, asm, 1>;
@@ -306,14 +265,14 @@ class MIMG_Gather_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
RegisterClass src_rc,
bit wqm,
- bit d16_bit=0,
- string dns=""> : MIMG <
- (outs dst_rc:$vdata),
- (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
- DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
- asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""),
- []>, MIMGe<op> {
+ string dns="">
+ : MIMG <(outs dst_rc:$vdata),
+ (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
+ DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
+ R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da, D16:$d16),
+ asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da$d16",
+ []>,
+ MIMGe<op> {
let mayLoad = 1;
let mayStore = 0;
@@ -327,7 +286,7 @@ class MIMG_Gather_Helper <bits<7> op, string asm,
let Gather4 = 1;
let hasPostISelHook = 0;
let WQM = wqm;
- let D16 = d16;
+ let HasD16 = 1;
let DecoderNamespace = dns;
let isAsmParserOnly = !if(!eq(dns,""), 1, 0);
@@ -336,29 +295,25 @@ class MIMG_Gather_Helper <bits<7> op, string asm,
multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
- bit wqm, bit d16_bit,
- string prefix,
- string suffix> {
- def prefix # _V1 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm, d16_bit, "AMDGPU">;
- def prefix # _V2 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm, d16_bit>;
- def prefix # _V3 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_96, wqm, d16_bit>;
- def prefix # _V4 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm, d16_bit>;
- def prefix # _V8 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm, d16_bit>;
- def prefix # _V16 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm, d16_bit>;
+ int channels, bit wqm> {
+ def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm,
+ !if(!eq(channels, 4), "AMDGPU", "")>,
+ MIMG_Gather_Size<asm#"_V1", channels>;
+ def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm>,
+ MIMG_Gather_Size<asm#"_V2", channels>;
+ def _V3 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_96, wqm>,
+ MIMG_Gather_Size<asm#"_V3", channels>;
+ def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm>,
+ MIMG_Gather_Size<asm#"_V4", channels>;
+ def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm>,
+ MIMG_Gather_Size<asm#"_V8", channels>;
+ def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm>,
+ MIMG_Gather_Size<asm#"_V16", channels>;
}
multiclass MIMG_Gather <bits<7> op, string asm, bit wqm=0> {
- defm "" : MIMG_Gather_Src_Helper<op, asm, VReg_128, wqm, 0, "_V4", "">;
-
- let d16 = 1 in {
- let AssemblerPredicate = HasPackedD16VMem in {
- defm "" : MIMG_Gather_Src_Helper<op, asm, VReg_64, wqm, 1, "_V2", "_D16">;
- } // End HasPackedD16VMem.
-
- let AssemblerPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
- defm "" : MIMG_Gather_Src_Helper<op, asm, VReg_128, wqm, 1, "_V4", "_D16_gfx80">;
- } // End HasUnpackedD16VMem.
- } // End d16 = 1.
+ defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, wqm>; /* for packed D16 only */
+ defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, wqm>;
}
multiclass MIMG_Gather_WQM <bits<7> op, string asm> : MIMG_Gather<op, asm, 1>;
@@ -367,19 +322,19 @@ multiclass MIMG_Gather_WQM <bits<7> op, string asm> : MIMG_Gather<op, asm, 1>;
// MIMG Instructions
//===----------------------------------------------------------------------===//
let SubtargetPredicate = isGCN in {
-defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load">;
-defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip">;
-defm IMAGE_LOAD_PCK : MIMG_PckNoSampler <0x00000002, "image_load_pck">;
-defm IMAGE_LOAD_PCK_SGN : MIMG_PckNoSampler <0x00000003, "image_load_pck_sgn">;
-defm IMAGE_LOAD_MIP_PCK : MIMG_PckNoSampler <0x00000004, "image_load_mip_pck">;
-defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_PckNoSampler <0x00000005, "image_load_mip_pck_sgn">;
-defm IMAGE_STORE : MIMG_Store <0x00000008, "image_store">;
-defm IMAGE_STORE_MIP : MIMG_Store <0x00000009, "image_store_mip">;
-defm IMAGE_STORE_PCK : MIMG_PckStore <0x0000000a, "image_store_pck">;
-defm IMAGE_STORE_MIP_PCK : MIMG_PckStore <0x0000000b, "image_store_mip_pck">;
+defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load", 1>;
+defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip", 1>;
+defm IMAGE_LOAD_PCK : MIMG_NoSampler <0x00000002, "image_load_pck", 0>;
+defm IMAGE_LOAD_PCK_SGN : MIMG_NoSampler <0x00000003, "image_load_pck_sgn", 0>;
+defm IMAGE_LOAD_MIP_PCK : MIMG_NoSampler <0x00000004, "image_load_mip_pck", 0>;
+defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoSampler <0x00000005, "image_load_mip_pck_sgn", 0>;
+defm IMAGE_STORE : MIMG_Store <0x00000008, "image_store", 1>;
+defm IMAGE_STORE_MIP : MIMG_Store <0x00000009, "image_store_mip", 1>;
+defm IMAGE_STORE_PCK : MIMG_Store <0x0000000a, "image_store_pck", 0>;
+defm IMAGE_STORE_MIP_PCK : MIMG_Store <0x0000000b, "image_store_mip_pck", 0>;
let mayLoad = 0, mayStore = 0 in {
-defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
+defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo", 0>;
}
defm IMAGE_ATOMIC_SWAP : MIMG_Atomic <mimg<0x0f, 0x10>, "image_atomic_swap">;
@@ -457,7 +412,7 @@ defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, "image_gather4_c_b_cl
defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">;
let mayLoad = 0, mayStore = 0 in {
-defm IMAGE_GET_LOD : MIMG_Sampler_WQM <0x00000060, "image_get_lod">;
+defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "image_get_lod", 1, 0>;
}
defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, "image_sample_cd">;
@@ -519,13 +474,13 @@ class makeRegSequence<ValueType vt, RegisterClass RC, list<string> names> {
}
class ImageDimPattern<AMDGPUImageDimIntrinsic I,
- string dop, ValueType dty,
+ string dop, ValueType dty, bit d16,
string suffix = ""> : GCNPat<(undef), (undef)> {
list<AMDGPUArg> AddrArgs = I.P.AddrDefaultArgs;
getDwordsType AddrDwords = getDwordsType<!size(AddrArgs)>;
- Instruction MI =
- !cast<Instruction>(!strconcat("IMAGE_", I.P.OpMod, dop, AddrDwords.suffix, suffix));
+ MIMG MI =
+ !cast<MIMG>(!strconcat("IMAGE_", I.P.OpMod, dop, AddrDwords.suffix, suffix));
// DAG fragment to match data arguments (vdata for store/atomic, dmask
// for non-atomic).
@@ -581,7 +536,8 @@ class ImageDimPattern<AMDGPUImageDimIntrinsic I,
0, /* r128 */
0, /* tfe */
0 /*(as_i1imm $lwe)*/,
- { I.P.Dim.DA }));
+ { I.P.Dim.DA }),
+ !if(MI.HasD16, (MI d16), (MI)));
let ResultInstrs = [
!if(IsCmpSwap, (EXTRACT_SUBREG ImageInstruction, sub0), ImageInstruction)
];
@@ -589,23 +545,23 @@ class ImageDimPattern<AMDGPUImageDimIntrinsic I,
foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
AMDGPUImageDimGetResInfoIntrinsics) in {
- def intr#_pat_v1 : ImageDimPattern<intr, "_V1", f32>;
- def intr#_pat_v2 : ImageDimPattern<intr, "_V2", v2f32>;
- def intr#_pat_v4 : ImageDimPattern<intr, "_V4", v4f32>;
+ def intr#_pat_v1 : ImageDimPattern<intr, "_V1", f32, 0>;
+ def intr#_pat_v2 : ImageDimPattern<intr, "_V2", v2f32, 0>;
+ def intr#_pat_v4 : ImageDimPattern<intr, "_V4", v4f32, 0>;
}
multiclass ImageDimD16Helper<AMDGPUImageDimIntrinsic I,
AMDGPUImageDimIntrinsic d16helper> {
let SubtargetPredicate = HasUnpackedD16VMem in {
- def _unpacked_v1 : ImageDimPattern<I, "_V1", f16, "_D16_gfx80">;
- def _unpacked_v2 : ImageDimPattern<d16helper, "_V2", v2i32, "_D16_gfx80">;
- def _unpacked_v4 : ImageDimPattern<d16helper, "_V4", v4i32, "_D16_gfx80">;
+ def _unpacked_v1 : ImageDimPattern<I, "_V1", f16, 1>;
+ def _unpacked_v2 : ImageDimPattern<d16helper, "_V2", v2i32, 1>;
+ def _unpacked_v4 : ImageDimPattern<d16helper, "_V4", v4i32, 1>;
} // End HasUnpackedD16VMem.
let SubtargetPredicate = HasPackedD16VMem in {
- def _packed_v1 : ImageDimPattern<I, "_V1", f16, "_D16">;
- def _packed_v2 : ImageDimPattern<I, "_V1", v2f16, "_D16">;
- def _packed_v4 : ImageDimPattern<I, "_V2", v4f16, "_D16">;
+ def _packed_v1 : ImageDimPattern<I, "_V1", f16, 1>;
+ def _packed_v2 : ImageDimPattern<I, "_V1", v2f16, 1>;
+ def _packed_v4 : ImageDimPattern<I, "_V2", v4f16, 1>;
} // End HasPackedD16VMem.
}
@@ -627,7 +583,7 @@ foreach intr = AMDGPUImageDimIntrinsics in {
}
foreach intr = AMDGPUImageDimGatherIntrinsics in {
- def intr#_pat3 : ImageDimPattern<intr, "_V4", v4f32>;
+ def intr#_pat3 : ImageDimPattern<intr, "_V4", v4f32, 0>;
def intr#_d16helper_profile : AMDGPUDimProfileCopy<intr.P> {
let RetTypes = !foreach(ty, intr.P.RetTypes, llvm_any_ty);
@@ -643,16 +599,16 @@ foreach intr = AMDGPUImageDimGatherIntrinsics in {
def intr#_unpacked_v4 :
ImageDimPattern<!cast<AMDGPUImageDimIntrinsic>(
"int_SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name),
- "_V4", v4i32, "_D16_gfx80">;
+ "_V4", v4i32, 1>;
} // End HasUnpackedD16VMem.
let SubtargetPredicate = HasPackedD16VMem in {
- def intr#_packed_v4 : ImageDimPattern<intr, "_V2", v4f16, "_D16">;
+ def intr#_packed_v4 : ImageDimPattern<intr, "_V2", v4f16, 1>;
} // End HasPackedD16VMem.
}
foreach intr = AMDGPUImageDimAtomicIntrinsics in {
- def intr#_pat1 : ImageDimPattern<intr, "_V1", i32>;
+ def intr#_pat1 : ImageDimPattern<intr, "_V1", i32, 0>;
}
/********** ======================= **********/
@@ -663,154 +619,160 @@ foreach intr = AMDGPUImageDimAtomicIntrinsics in {
// TODO:
// 1. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128).
// 2. Add A16 support when we pass address of half type.
-multiclass ImageSamplePattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> {
+multiclass ImageSamplePattern<SDPatternOperator name, MIMG opcode,
+ ValueType dt, ValueType vt, bit d16> {
def : GCNPat<
(dt (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, i1:$glc,
i1:$slc, i1:$lwe, i1:$da)),
- (opcode $addr, $rsrc, $sampler,
- (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc),
- 0, 0, (as_i1imm $lwe), (as_i1imm $da))
+ !con((opcode $addr, $rsrc, $sampler, (as_i32imm $dmask), (as_i1imm $unorm),
+ (as_i1imm $glc), (as_i1imm $slc), 0, 0, (as_i1imm $lwe),
+ (as_i1imm $da)),
+ !if(opcode.HasD16, (opcode d16), (opcode)))
>;
}
-multiclass ImageSampleDataPatterns<SDPatternOperator name, string opcode, ValueType dt, string suffix = ""> {
- defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V1 # suffix), dt, f32>;
- defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V2 # suffix), dt, v2f32>;
- defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V4 # suffix), dt, v4f32>;
- defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V8 # suffix), dt, v8f32>;
- defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V16 # suffix), dt, v16f32>;
+multiclass ImageSampleDataPatterns<SDPatternOperator name, string opcode,
+ ValueType dt, bit d16> {
+ defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V1), dt, f32, d16>;
+ defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V2), dt, v2f32, d16>;
+ defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V4), dt, v4f32, d16>;
+ defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V8), dt, v8f32, d16>;
+ defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V16), dt, v16f32, d16>;
}
// ImageSample patterns.
multiclass ImageSamplePatterns<SDPatternOperator name, string opcode> {
- defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f32>;
- defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
- defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
+ defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f32, 0>;
+ defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2f32, 0>;
+ defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32, 0>;
let SubtargetPredicate = HasUnpackedD16VMem in {
- defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16_gfx80">;
+ defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
} // End HasUnpackedD16VMem.
let SubtargetPredicate = HasPackedD16VMem in {
- defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
- defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">;
- defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v4f16, "_D16">;
+ defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
+ defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), v2f16, 1>;
+ defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v4f16, 1>;
} // End HasPackedD16VMem.
}
// ImageSample alternative patterns for illegal vector half Types.
multiclass ImageSampleAltPatterns<SDPatternOperator name, string opcode> {
let SubtargetPredicate = HasUnpackedD16VMem in {
- defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">;
- defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">;
+ defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2i32, 1>;
+ defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4i32, 1>;
} // End HasUnpackedD16VMem.
}
// ImageGather4 patterns.
multiclass ImageGather4Patterns<SDPatternOperator name, string opcode> {
- defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
+ defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32, 0>;
let SubtargetPredicate = HasPackedD16VMem in {
- defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v4f16, "_D16">;
+ defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v4f16, 1>;
} // End HasPackedD16VMem.
}
// ImageGather4 alternative patterns for illegal vector half Types.
multiclass ImageGather4AltPatterns<SDPatternOperator name, string opcode> {
let SubtargetPredicate = HasUnpackedD16VMem in {
- defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">;
+ defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4i32, 1>;
} // End HasUnpackedD16VMem.
-
}
// ImageLoad for amdgcn.
-multiclass ImageLoadPattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> {
+multiclass ImageLoadPattern<SDPatternOperator name, MIMG opcode,
+ ValueType dt, ValueType vt, bit d16> {
def : GCNPat <
(dt (name vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe,
i1:$da)),
- (opcode $addr, $rsrc,
- (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc),
- 0, 0, (as_i1imm $lwe), (as_i1imm $da))
+ !con((opcode $addr, $rsrc, (as_i32imm $dmask), 1, (as_i1imm $glc),
+ (as_i1imm $slc), 0, 0, (as_i1imm $lwe), (as_i1imm $da)),
+ !if(opcode.HasD16, (opcode d16), (opcode)))
>;
}
-multiclass ImageLoadDataPatterns<SDPatternOperator name, string opcode, ValueType dt, string suffix = ""> {
- defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V1 # suffix), dt, i32>;
- defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V2 # suffix), dt, v2i32>;
- defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4 # suffix), dt, v4i32>;
+multiclass ImageLoadDataPatterns<SDPatternOperator name, string opcode,
+ ValueType dt, bit d16> {
+ defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V1), dt, i32, d16>;
+ defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V2), dt, v2i32, d16>;
+ defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4), dt, v4i32, d16>;
}
// ImageLoad patterns.
// TODO: support v3f32.
multiclass ImageLoadPatterns<SDPatternOperator name, string opcode> {
- defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f32>;
- defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
- defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
+ defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f32, 0>;
+ defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2f32, 0>;
+ defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4f32, 0>;
let SubtargetPredicate = HasUnpackedD16VMem in {
- defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16_gfx80">;
+ defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
} // End HasUnpackedD16VMem.
let SubtargetPredicate = HasPackedD16VMem in {
- defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
- defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">;
- defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v4f16, "_D16">;
+ defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
+ defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), v2f16, 1>;
+ defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v4f16, 1>;
} // End HasPackedD16VMem.
}
// ImageLoad alternative patterns for illegal vector half Types.
multiclass ImageLoadAltPatterns<SDPatternOperator name, string opcode> {
let SubtargetPredicate = HasUnpackedD16VMem in {
- defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">;
- defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">;
+ defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2i32, 1>;
+ defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4i32, 1>;
} // End HasUnPackedD16VMem.
}
// ImageStore for amdgcn.
-multiclass ImageStorePattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> {
+multiclass ImageStorePattern<SDPatternOperator name, MIMG opcode,
+ ValueType dt, ValueType vt, bit d16> {
def : GCNPat <
(name dt:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc,
i1:$lwe, i1:$da),
- (opcode $data, $addr, $rsrc,
- (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc),
- 0, 0, (as_i1imm $lwe), (as_i1imm $da))
+ !con((opcode $data, $addr, $rsrc, (as_i32imm $dmask), 1, (as_i1imm $glc),
+ (as_i1imm $slc), 0, 0, (as_i1imm $lwe), (as_i1imm $da)),
+ !if(opcode.HasD16, (opcode d16), (opcode)))
>;
}
-multiclass ImageStoreDataPatterns<SDPatternOperator name, string opcode, ValueType dt, string suffix = ""> {
- defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V1 # suffix), dt, i32>;
- defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V2 # suffix), dt, v2i32>;
- defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4 # suffix), dt, v4i32>;
+multiclass ImageStoreDataPatterns<SDPatternOperator name, string opcode,
+ ValueType dt, bit d16> {
+ defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V1), dt, i32, d16>;
+ defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V2), dt, v2i32, d16>;
+ defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4), dt, v4i32, d16>;
}
// ImageStore patterns.
// TODO: support v3f32.
multiclass ImageStorePatterns<SDPatternOperator name, string opcode> {
- defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f32>;
- defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
- defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
+ defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f32, 0>;
+ defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2f32, 0>;
+ defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4f32, 0>;
let SubtargetPredicate = HasUnpackedD16VMem in {
- defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16_gfx80">;
+ defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
} // End HasUnpackedD16VMem.
let SubtargetPredicate = HasPackedD16VMem in {
- defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
- defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">;
- defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v4f16, "_D16">;
+ defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
+ defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), v2f16, 1>;
+ defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v4f16, 1>;
} // End HasPackedD16VMem.
}
// ImageStore alternative patterns.
multiclass ImageStoreAltPatterns<SDPatternOperator name, string opcode> {
let SubtargetPredicate = HasUnpackedD16VMem in {
- defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">;
- defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">;
+ defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2i32, 1>;
+ defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4i32, 1>;
} // End HasUnpackedD16VMem.
let SubtargetPredicate = HasPackedD16VMem in {
- defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), i32, "_D16">;
- defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16">;
+ defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), i32, 1>;
+ defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2i32, 1>;
} // End HasPackedD16VMem.
}
@@ -1030,83 +992,3 @@ defm : ImageAtomicPatterns<int_amdgcn_image_atomic_or, "IMAGE_ATOMIC_OR">;
defm : ImageAtomicPatterns<int_amdgcn_image_atomic_xor, "IMAGE_ATOMIC_XOR">;
defm : ImageAtomicPatterns<int_amdgcn_image_atomic_inc, "IMAGE_ATOMIC_INC">;
defm : ImageAtomicPatterns<int_amdgcn_image_atomic_dec, "IMAGE_ATOMIC_DEC">;
-
-/* SIsample for simple 1D texture lookup */
-def : GCNPat <
- (SIsample i32:$addr, v8i32:$rsrc, v4i32:$sampler, imm),
- (IMAGE_SAMPLE_V4_V1 $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
->;
-
-class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : GCNPat <
- (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, imm),
- (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
->;
-
-class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : GCNPat <
- (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_RECT),
- (opcode $addr, $rsrc, $sampler, 0xf, 1, 0, 0, 0, 0, 0, 0)
->;
-
-class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : GCNPat <
- (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_ARRAY),
- (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1)
->;
-
-class SampleShadowPattern<SDNode name, MIMG opcode,
- ValueType vt> : GCNPat <
- (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW),
- (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
->;
-
-class SampleShadowArrayPattern<SDNode name, MIMG opcode,
- ValueType vt> : GCNPat <
- (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY),
- (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1)
->;
-
-/* SIsample* for texture lookups consuming more address parameters */
-multiclass SamplePatterns<MIMG sample, MIMG sample_c, MIMG sample_l,
- MIMG sample_c_l, MIMG sample_b, MIMG sample_c_b,
-MIMG sample_d, MIMG sample_c_d, ValueType addr_type> {
- def : SamplePattern <SIsample, sample, addr_type>;
- def : SampleRectPattern <SIsample, sample, addr_type>;
- def : SampleArrayPattern <SIsample, sample, addr_type>;
- def : SampleShadowPattern <SIsample, sample_c, addr_type>;
- def : SampleShadowArrayPattern <SIsample, sample_c, addr_type>;
-
- def : SamplePattern <SIsamplel, sample_l, addr_type>;
- def : SampleArrayPattern <SIsamplel, sample_l, addr_type>;
- def : SampleShadowPattern <SIsamplel, sample_c_l, addr_type>;
- def : SampleShadowArrayPattern <SIsamplel, sample_c_l, addr_type>;
-
- def : SamplePattern <SIsampleb, sample_b, addr_type>;
- def : SampleArrayPattern <SIsampleb, sample_b, addr_type>;
- def : SampleShadowPattern <SIsampleb, sample_c_b, addr_type>;
- def : SampleShadowArrayPattern <SIsampleb, sample_c_b, addr_type>;
-
- def : SamplePattern <SIsampled, sample_d, addr_type>;
- def : SampleArrayPattern <SIsampled, sample_d, addr_type>;
- def : SampleShadowPattern <SIsampled, sample_c_d, addr_type>;
- def : SampleShadowArrayPattern <SIsampled, sample_c_d, addr_type>;
-}
-
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V2, IMAGE_SAMPLE_C_V4_V2,
- IMAGE_SAMPLE_L_V4_V2, IMAGE_SAMPLE_C_L_V4_V2,
- IMAGE_SAMPLE_B_V4_V2, IMAGE_SAMPLE_C_B_V4_V2,
- IMAGE_SAMPLE_D_V4_V2, IMAGE_SAMPLE_C_D_V4_V2,
- v2i32>;
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V4, IMAGE_SAMPLE_C_V4_V4,
- IMAGE_SAMPLE_L_V4_V4, IMAGE_SAMPLE_C_L_V4_V4,
- IMAGE_SAMPLE_B_V4_V4, IMAGE_SAMPLE_C_B_V4_V4,
- IMAGE_SAMPLE_D_V4_V4, IMAGE_SAMPLE_C_D_V4_V4,
- v4i32>;
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V8, IMAGE_SAMPLE_C_V4_V8,
- IMAGE_SAMPLE_L_V4_V8, IMAGE_SAMPLE_C_L_V4_V8,
- IMAGE_SAMPLE_B_V4_V8, IMAGE_SAMPLE_C_B_V4_V8,
- IMAGE_SAMPLE_D_V4_V8, IMAGE_SAMPLE_C_D_V4_V8,
- v8i32>;
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V16, IMAGE_SAMPLE_C_V4_V16,
- IMAGE_SAMPLE_L_V4_V16, IMAGE_SAMPLE_C_L_V4_V16,
- IMAGE_SAMPLE_B_V4_V16, IMAGE_SAMPLE_C_B_V4_V16,
- IMAGE_SAMPLE_D_V4_V16, IMAGE_SAMPLE_C_D_V4_V16,
- v16i32>;
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index ea4447f5058..a6d28d6999e 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -87,8 +87,8 @@ enum : uint64_t {
// Is a packed VOP3P instruction.
IsPacked = UINT64_C(1) << 49,
- // "d16" bit set or not.
- D16 = UINT64_C(1) << 50
+ // Is a D16 buffer instruction.
+ D16Buf = UINT64_C(1) << 50
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d6199498c68..10005eb83a3 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7762,9 +7762,16 @@ static unsigned SubIdx2Lane(unsigned Idx) {
/// Adjust the writemask of MIMG instructions
SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
SelectionDAG &DAG) const {
+ unsigned Opcode = Node->getMachineOpcode();
+
+ // Subtract 1 because the vdata output is not a MachineSDNode operand.
+ int D16Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::d16) - 1;
+ if (D16Idx >= 0 && Node->getConstantOperandVal(D16Idx))
+ return Node; // not implemented for D16
+
SDNode *Users[4] = { nullptr };
unsigned Lane = 0;
- unsigned DmaskIdx = (Node->getNumOperands() - Node->getNumValues() == 9) ? 2 : 3;
+ unsigned DmaskIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) - 1;
unsigned OldDmask = Node->getConstantOperandVal(DmaskIdx);
unsigned NewDmask = 0;
bool HasChain = Node->getNumValues() > 1;
@@ -7936,7 +7943,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
unsigned Opcode = Node->getMachineOpcode();
if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
- !TII->isGather4(Opcode) && !TII->isD16(Opcode)) {
+ !TII->isGather4(Opcode)) {
return adjustWritemask(Node, DAG);
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index 5fbee37e3ed..b882de3fec0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -118,8 +118,8 @@ class InstSI <dag outs, dag ins, string asm = "",
// This bit indicates that this is a packed VOP3P instruction
field bit IsPacked = 0;
- // This bit indicates that this is a D16 instruction.
- field bit D16 = 0;
+ // This bit indicates that this is a D16 buffer instruction.
+ field bit D16Buf = 0;
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
@@ -176,7 +176,7 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{49} = IsPacked;
- let TSFlags{50} = D16;
+ let TSFlags{50} = D16Buf;
let SchedRW = [Write32Bit];
@@ -255,7 +255,7 @@ class MIMGe <bits<7> op> : Enc64 {
bits<1> tfe;
bits<1> lwe;
bits<1> slc;
- bits<1> d16 = 0;
+ bit d16;
bits<8> vaddr;
bits<7> srsrc;
bits<7> ssamp;
@@ -344,4 +344,6 @@ class MIMG <dag outs, dag ins, string asm, list<dag> pattern> :
let UseNamedOperandTable = 1;
let hasSideEffects = 0; // XXX ????
+
+ bit HasD16 = 0;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index e83f00d047f..a9038cfe6ae 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -445,14 +445,6 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::Gather4;
}
- static bool isD16(const MachineInstr &MI) {
- return MI.getDesc().TSFlags & SIInstrFlags::D16;
- }
-
- bool isD16(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::D16;
- }
-
static bool isFLAT(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index f5ca0889127..d1957bc2671 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -300,16 +300,6 @@ def SIImage_gather4_c_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_O"
def SIImage_gather4_c_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_CL_O">;
def SIImage_gather4_c_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_LZ_O">;
-class SDSample<string opcode> : SDNode <opcode,
- SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v8i32>,
- SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]>
->;
-
-def SIsample : SDSample<"AMDGPUISD::SAMPLE">;
-def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">;
-def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
-def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
-
def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
>;
@@ -2079,6 +2069,14 @@ def getMIMGAtomicOp2 : InstrMapping {
let ValueCols = [["1"]];
}
+def getMIMGGatherOpPackedD16 : InstrMapping {
+ let FilterClass = "MIMG_Gather_Size";
+ let RowFields = ["Op"];
+ let ColFields = ["Channels"];
+ let KeyCol = ["4"];
+ let ValueCols = [["2"]];
+}
+
// Maps an commuted opcode to its original version
def getCommuteOrig : InstrMapping {
let FilterClass = "Commutable_REV";
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 2abc18dc0ce..3b2d5c2bf09 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -166,6 +166,9 @@ int getMaskedMIMGAtomicOp(const MCInstrInfo &MII,
unsigned Opc, unsigned NewChannels);
LLVM_READONLY
+int getMIMGGatherOpPackedD16(uint16_t Opcode);
+
+LLVM_READONLY
int getMCOpcode(uint16_t Opcode, unsigned Gen);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
index 1a95d1037dd..b1d3d7349f3 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
@@ -61,7 +61,7 @@ body: |
%11.sub6 = COPY %1
%11.sub7 = COPY %1
%11.sub8 = COPY %1
- dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec
+ dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec
%20.sub1 = COPY %2
%20.sub2 = COPY %2
%20.sub3 = COPY %2
@@ -70,6 +70,6 @@ body: |
%20.sub6 = COPY %2
%20.sub7 = COPY %2
%20.sub8 = COPY %2
- dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec
+ dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.mir b/llvm/test/CodeGen/AMDGPU/memory_clause.mir
index 4a40a3324db..cab91c555fe 100644
--- a/llvm/test/CodeGen/AMDGPU/memory_clause.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory_clause.mir
@@ -305,11 +305,11 @@ body: |
# GCN-LABEL: {{^}}name: image_clause{{$}}
# GCN: early-clobber %4:vreg_128, early-clobber %3:vreg_128, early-clobber %5:vreg_128 = BUNDLE %0, undef %2:sreg_128, %1, implicit $exec {
-# GCN-NEXT: %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: }
-# GCN-NEXT: IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
---
name: image_clause
@@ -325,17 +325,17 @@ body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
- %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
- IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
- IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
+ %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: mixed_clause{{$}}
# GCN: dead early-clobber %4:vreg_128, dead early-clobber %3:vreg_128, dead early-clobber %5:vgpr_32 = BUNDLE %0, %2, %1, implicit $exec {
-# GCN-NEXT: dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: }
@@ -355,7 +355,7 @@ body: |
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = IMPLICIT_DEF
- %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
%5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, implicit $exec
...
diff --git a/llvm/test/MC/AMDGPU/mimg.s b/llvm/test/MC/AMDGPU/mimg.s
index 176c4ef60fc..db7176aaf9e 100644
--- a/llvm/test/MC/AMDGPU/mimg.s
+++ b/llvm/test/MC/AMDGPU/mimg.s
@@ -356,20 +356,19 @@ image_gather4 v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x8
// GCN: image_gather4 v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x8 ; encoding: [0x00,0x08,0x00,0xf1,0x01,0x05,0x62,0x00]
image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 d16
-// NOSICI: error: instruction not supported on this GPU
+// NOSICI: error: d16 modifier is not supported on this GPU
// GFX8_0: image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
-// NOGFX8_1: error: instruction not supported on this GPU
-// NOGFX9: error: instruction not supported on this GPU
+// NOGFX8_1: error: image data size does not match dmask and tfe
+// NOGFX9: error: image data size does not match dmask and tfe
image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16
// NOSICI: error: d16 modifier is not supported on this GPU
-// NOGFX8_0: error: instruction not supported on this GPU
+// NOGFX8_0: error: image data size does not match dmask and tfe
// GFX8_1: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
// GFX9: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
-// FIXME: d16 is handled as an optional modifier, should it be corrected?
image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1
-// NOSICI: error: d16 modifier is not supported on this GPU
-// NOGFX8_0: error: instruction not supported on this GPU
-// GFX8_1: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
-// GFX9: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
+// NOSICI: error: image data size does not match dmask and tfe
+// NOGFX8_0: error: image data size does not match dmask and tfe
+// NOGFX8_1: error: image data size does not match dmask and tfe
+// NOGFX9: error: image data size does not match dmask and tfe
OpenPOWER on IntegriCloud