summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorDmitry Preobrazhensky <dmitry.preobrazhensky@amd.com>2018-01-26 15:43:29 +0000
committerDmitry Preobrazhensky <dmitry.preobrazhensky@amd.com>2018-01-26 15:43:29 +0000
commit0b4eb1ead18e437b3f630bf3f4c082adab417170 (patch)
tree75d09dbff6eba1d7c8c155bf8bf8e0f87a48913a /llvm/lib
parent041ef2dd15dc1606e338dafc48a1f263ec2fa61f (diff)
downloadbcm5719-llvm-0b4eb1ead18e437b3f630bf3f4c082adab417170.tar.gz
bcm5719-llvm-0b4eb1ead18e437b3f630bf3f4c082adab417170.zip
[AMDGPU][MC] Added support of 64-bit image atomics
See bug 35998: https://bugs.llvm.org/show_bug.cgi?id=35998 Differential Revision: https://reviews.llvm.org/D42469 Reviewers: vpykhtin, artem.tamazov, arsenm llvm-svn: 323534
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp35
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td54
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td16
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp22
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h5
5 files changed, 115 insertions, 17 deletions
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 6ea9367f270..3697d5aec64 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -265,11 +265,20 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
}
DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
+ int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::vdst);
+
int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::vdata);
int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::dmask);
+
+ assert(VDataIdx != -1);
+ assert(DMaskIdx != -1);
+
+ bool isAtomic = (VDstIdx != -1);
+
unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
if (DMask == 0)
return MCDisassembler::Success;
@@ -278,12 +287,26 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
if (ChannelCount == 1)
return MCDisassembler::Success;
- int NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), ChannelCount);
- assert(NewOpcode != -1 && "could not find matching mimg channel instruction");
+ int NewOpcode = -1;
+
+ if (isAtomic) {
+ if (DMask == 0x1 || DMask == 0x3 || DMask == 0xF) {
+ NewOpcode = AMDGPU::getMaskedMIMGAtomicOp(*MCII, MI.getOpcode(), ChannelCount);
+ }
+ if (NewOpcode == -1) return MCDisassembler::Success;
+ } else {
+ NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), ChannelCount);
+ assert(NewOpcode != -1 && "could not find matching mimg channel instruction");
+ }
+
auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
- // Widen the register to the correct number of enabled channels.
+ // Get first subregister of VData
unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
+ unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
+ Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
+
+ // Widen the register to the correct number of enabled channels.
auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
&MRI.getRegClass(RCID));
if (NewVdata == AMDGPU::NoRegister) {
@@ -297,6 +320,12 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
// how it is usually emitted because the number of register components is not
// in the instruction encoding.
MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
+
+ if (isAtomic) {
+ // Atomic operations have an additional operand (a copy of data)
+ MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
+ }
+
return MCDisassembler::Success;
}
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index c49691c4342..d31d33ab4de 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -12,6 +12,11 @@ class MIMG_Mask <string op, int channels> {
int Channels = channels;
}
+class MIMG_Atomic_Size <string op, bit is32Bit> {
+ string Op = op;
+ int AtomicSize = !if(is32Bit, 1, 2);
+}
+
class mimg <bits<7> si, bits<7> vi = si> {
field bits<7> SI = si;
field bits<7> VI = vi;
@@ -173,9 +178,13 @@ class MIMG_Atomic_Real_vi<mimg op, string name, string asm,
let DisableDecoder = DisableVIDecoder;
}
-multiclass MIMG_Atomic_Helper_m <mimg op, string name, string asm,
+multiclass MIMG_Atomic_Helper_m <mimg op,
+ string name,
+ string asm,
+ string key,
RegisterClass data_rc,
RegisterClass addr_rc,
+ bit is32Bit,
bit enableDasm = 0> {
let isPseudo = 1, isCodeGenOnly = 1 in {
def "" : MIMG_Atomic_Helper<asm, data_rc, addr_rc>,
@@ -183,18 +192,35 @@ multiclass MIMG_Atomic_Helper_m <mimg op, string name, string asm,
}
let ssamp = 0 in {
- def _si : MIMG_Atomic_Real_si<op, name, asm, data_rc, addr_rc, enableDasm>;
+ def _si : MIMG_Atomic_Real_si<op, name, asm, data_rc, addr_rc, enableDasm>,
+ MIMG_Atomic_Size<key # "_si", is32Bit>;
- def _vi : MIMG_Atomic_Real_vi<op, name, asm, data_rc, addr_rc, enableDasm>;
+ def _vi : MIMG_Atomic_Real_vi<op, name, asm, data_rc, addr_rc, enableDasm>,
+ MIMG_Atomic_Size<key # "_vi", is32Bit>;
}
}
-multiclass MIMG_Atomic <mimg op, string asm, RegisterClass data_rc = VGPR_32> {
+multiclass MIMG_Atomic_Addr_Helper_m <mimg op,
+ string name,
+ string asm,
+ RegisterClass data_rc,
+ bit is32Bit,
+ bit enableDasm = 0> {
// _V* variants have different address size, but the size is not encoded.
// So only one variant can be disassembled. V1 looks the safest to decode.
- defm _V1 : MIMG_Atomic_Helper_m <op, asm # "_V1", asm, data_rc, VGPR_32, 1>;
- defm _V2 : MIMG_Atomic_Helper_m <op, asm # "_V2", asm, data_rc, VReg_64>;
- defm _V4 : MIMG_Atomic_Helper_m <op, asm # "_V3", asm, data_rc, VReg_128>;
+ defm _V1 : MIMG_Atomic_Helper_m <op, name # "_V1", asm, asm # "_V1", data_rc, VGPR_32, is32Bit, enableDasm>;
+ defm _V2 : MIMG_Atomic_Helper_m <op, name # "_V2", asm, asm # "_V2", data_rc, VReg_64, is32Bit>;
+ defm _V4 : MIMG_Atomic_Helper_m <op, name # "_V3", asm, asm # "_V3", data_rc, VReg_128, is32Bit>;
+}
+
+multiclass MIMG_Atomic <mimg op, string asm,
+ RegisterClass data_rc_32 = VGPR_32, // 32-bit atomics
+ RegisterClass data_rc_64 = VReg_64> { // 64-bit atomics
+ // _V* variants have different dst size, but the size is encoded implicitly,
+ // using dmask and tfe. Only 32-bit variant is registered with disassembler.
+ // Other variants are reconstructed by disassembler using dmask and tfe.
+ defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm # "_V1", asm, data_rc_32, 1, 1>;
+ defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm # "_V2", asm, data_rc_64, 0>;
}
class MIMG_Sampler_Helper <bits<7> op, string asm,
@@ -344,7 +370,7 @@ defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
}
defm IMAGE_ATOMIC_SWAP : MIMG_Atomic <mimg<0x0f, 0x10>, "image_atomic_swap">;
-defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic <mimg<0x10, 0x11>, "image_atomic_cmpswap", VReg_64>;
+defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic <mimg<0x10, 0x11>, "image_atomic_cmpswap", VReg_64, VReg_128>;
defm IMAGE_ATOMIC_ADD : MIMG_Atomic <mimg<0x11, 0x12>, "image_atomic_add">;
defm IMAGE_ATOMIC_SUB : MIMG_Atomic <mimg<0x12, 0x13>, "image_atomic_sub">;
//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"image_atomic_rsub", 0x00000013>; -- not on VI
@@ -590,9 +616,9 @@ class ImageAtomicPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : GC
// ImageAtomic patterns.
multiclass ImageAtomicPatterns<SDPatternOperator name, string opcode> {
- def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V1), i32>;
- def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V2), v2i32>;
- def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V4), v4i32>;
+ def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V1_V1), i32>;
+ def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V1_V2), v2i32>;
+ def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V1_V4), v4i32>;
}
// ImageAtomicCmpSwap for amdgcn.
@@ -784,9 +810,9 @@ defm : ImageSamplePatterns<int_amdgcn_image_getlod, "IMAGE_GET_LOD">;
// Image atomics
defm : ImageAtomicPatterns<int_amdgcn_image_atomic_swap, "IMAGE_ATOMIC_SWAP">;
-def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V1, i32>;
-def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V2, v2i32>;
-def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V4, v4i32>;
+def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V1_V1, i32>;
+def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V1_V2, v2i32>;
+def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V1_V4, v4i32>;
defm : ImageAtomicPatterns<int_amdgcn_image_atomic_add, "IMAGE_ATOMIC_ADD">;
defm : ImageAtomicPatterns<int_amdgcn_image_atomic_sub, "IMAGE_ATOMIC_SUB">;
defm : ImageAtomicPatterns<int_amdgcn_image_atomic_smin, "IMAGE_ATOMIC_SMIN">;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index f4516988b19..993aaa26daf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2040,6 +2040,22 @@ def getMaskedMIMGOp4 : InstrMapping {
let ValueCols = [["1"], ["2"], ["3"] ];
}
+def getMIMGAtomicOp1 : InstrMapping {
+ let FilterClass = "MIMG_Atomic_Size";
+ let RowFields = ["Op"];
+ let ColFields = ["AtomicSize"];
+ let KeyCol = ["1"];
+ let ValueCols = [["2"]];
+}
+
+def getMIMGAtomicOp2 : InstrMapping {
+ let FilterClass = "MIMG_Atomic_Size";
+ let RowFields = ["Op"];
+ let ColFields = ["AtomicSize"];
+ let KeyCol = ["2"];
+ let ValueCols = [["1"]];
+}
+
// Maps an commuted opcode to its original version
def getCommuteOrig : InstrMapping {
let FilterClass = "Commutable_REV";
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 0deb66b6452..6b4253471fc 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -156,6 +156,28 @@ int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels)
}
}
+int getMaskedMIMGAtomicOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) {
+ assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst) != -1);
+ assert(NewChannels == 1 || NewChannels == 2 || NewChannels == 4);
+
+ unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass);
+ assert(OrigChannels == 1 || OrigChannels == 2 || OrigChannels == 4);
+
+ if (NewChannels == OrigChannels) return Opc;
+
+ if (OrigChannels <= 2 && NewChannels <= 2) {
+ // This is an ordinary atomic (not an atomic_cmpswap)
+ return (OrigChannels == 1)?
+ AMDGPU::getMIMGAtomicOp1(Opc) : AMDGPU::getMIMGAtomicOp2(Opc);
+ } else if (OrigChannels >= 2 && NewChannels >= 2) {
+ // This is an atomic_cmpswap
+ return (OrigChannels == 2)?
+ AMDGPU::getMIMGAtomicOp1(Opc) : AMDGPU::getMIMGAtomicOp2(Opc);
+ } else { // invalid OrigChannels/NewChannels value
+ return -1;
+ }
+}
+
// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
// header files, so we need to wrap it in a function that takes unsigned
// instead.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 0c1d6976594..7745af1c45c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -159,6 +159,11 @@ int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
LLVM_READONLY
int getMaskedMIMGOp(const MCInstrInfo &MII,
unsigned Opc, unsigned NewChannels);
+
+LLVM_READONLY
+int getMaskedMIMGAtomicOp(const MCInstrInfo &MII,
+ unsigned Opc, unsigned NewChannels);
+
LLVM_READONLY
int getMCOpcode(uint16_t Opcode, unsigned Gen);
OpenPOWER on IntegriCloud