diff options
author | Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com> | 2018-01-26 15:43:29 +0000 |
---|---|---|
committer | Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com> | 2018-01-26 15:43:29 +0000 |
commit | 0b4eb1ead18e437b3f630bf3f4c082adab417170 (patch) | |
tree | 75d09dbff6eba1d7c8c155bf8bf8e0f87a48913a /llvm/lib | |
parent | 041ef2dd15dc1606e338dafc48a1f263ec2fa61f (diff) | |
download | bcm5719-llvm-0b4eb1ead18e437b3f630bf3f4c082adab417170.tar.gz bcm5719-llvm-0b4eb1ead18e437b3f630bf3f4c082adab417170.zip |
[AMDGPU][MC] Added support of 64-bit image atomics
See bug 35998: https://bugs.llvm.org/show_bug.cgi?id=35998
Differential Revision: https://reviews.llvm.org/D42469
Reviewers: vpykhtin, artem.tamazov, arsenm
llvm-svn: 323534
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 35 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MIMGInstructions.td | 54 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 16 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 22 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 5 |
5 files changed, 115 insertions, 17 deletions
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 6ea9367f270..3697d5aec64 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -265,11 +265,20 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { } DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { + int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::vdst); + int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dmask); + + assert(VDataIdx != -1); + assert(DMaskIdx != -1); + + bool isAtomic = (VDstIdx != -1); + unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; if (DMask == 0) return MCDisassembler::Success; @@ -278,12 +287,26 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { if (ChannelCount == 1) return MCDisassembler::Success; - int NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), ChannelCount); - assert(NewOpcode != -1 && "could not find matching mimg channel instruction"); + int NewOpcode = -1; + + if (isAtomic) { + if (DMask == 0x1 || DMask == 0x3 || DMask == 0xF) { + NewOpcode = AMDGPU::getMaskedMIMGAtomicOp(*MCII, MI.getOpcode(), ChannelCount); + } + if (NewOpcode == -1) return MCDisassembler::Success; + } else { + NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), ChannelCount); + assert(NewOpcode != -1 && "could not find matching mimg channel instruction"); + } + auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass; - // Widen the register to the correct number of enabled channels. + // Get first subregister of VData unsigned Vdata0 = MI.getOperand(VDataIdx).getReg(); + unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0); + Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0; + + // Widen the register to the correct number of enabled channels. auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &MRI.getRegClass(RCID)); if (NewVdata == AMDGPU::NoRegister) { @@ -297,6 +320,12 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { // how it is usually emitted because the number of register components is not // in the instruction encoding. MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); + + if (isAtomic) { + // Atomic operations have an additional operand (a copy of data) + MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata); + } + return MCDisassembler::Success; } diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index c49691c4342..d31d33ab4de 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -12,6 +12,11 @@ class MIMG_Mask <string op, int channels> { int Channels = channels; } +class MIMG_Atomic_Size <string op, bit is32Bit> { + string Op = op; + int AtomicSize = !if(is32Bit, 1, 2); +} + class mimg <bits<7> si, bits<7> vi = si> { field bits<7> SI = si; field bits<7> VI = vi; @@ -173,9 +178,13 @@ class MIMG_Atomic_Real_vi<mimg op, string name, string asm, let DisableDecoder = DisableVIDecoder; } -multiclass MIMG_Atomic_Helper_m <mimg op, string name, string asm, +multiclass MIMG_Atomic_Helper_m <mimg op, + string name, + string asm, + string key, RegisterClass data_rc, RegisterClass addr_rc, + bit is32Bit, bit enableDasm = 0> { let isPseudo = 1, isCodeGenOnly = 1 in { def "" : MIMG_Atomic_Helper<asm, data_rc, addr_rc>, @@ -183,18 +192,35 @@ multiclass MIMG_Atomic_Helper_m <mimg op, string name, string asm, } let ssamp = 0 in { - def _si : MIMG_Atomic_Real_si<op, name, asm, data_rc, addr_rc, enableDasm>; + def _si : MIMG_Atomic_Real_si<op, name, asm, data_rc, addr_rc, enableDasm>, + MIMG_Atomic_Size<key # "_si", is32Bit>; - def _vi : MIMG_Atomic_Real_vi<op, name, asm, data_rc, addr_rc, enableDasm>; + def _vi : MIMG_Atomic_Real_vi<op, name, asm, data_rc, addr_rc, enableDasm>, + MIMG_Atomic_Size<key # "_vi", is32Bit>; } } -multiclass MIMG_Atomic <mimg op, string asm, RegisterClass data_rc = VGPR_32> { +multiclass MIMG_Atomic_Addr_Helper_m <mimg op, + string name, + string asm, + RegisterClass data_rc, + bit is32Bit, + bit enableDasm = 0> { // _V* variants have different address size, but the size is not encoded. // So only one variant can be disassembled. V1 looks the safest to decode. - defm _V1 : MIMG_Atomic_Helper_m <op, asm # "_V1", asm, data_rc, VGPR_32, 1>; - defm _V2 : MIMG_Atomic_Helper_m <op, asm # "_V2", asm, data_rc, VReg_64>; - defm _V4 : MIMG_Atomic_Helper_m <op, asm # "_V3", asm, data_rc, VReg_128>; + defm _V1 : MIMG_Atomic_Helper_m <op, name # "_V1", asm, asm # "_V1", data_rc, VGPR_32, is32Bit, enableDasm>; + defm _V2 : MIMG_Atomic_Helper_m <op, name # "_V2", asm, asm # "_V2", data_rc, VReg_64, is32Bit>; + defm _V4 : MIMG_Atomic_Helper_m <op, name # "_V3", asm, asm # "_V3", data_rc, VReg_128, is32Bit>; +} + +multiclass MIMG_Atomic <mimg op, string asm, + RegisterClass data_rc_32 = VGPR_32, // 32-bit atomics + RegisterClass data_rc_64 = VReg_64> { // 64-bit atomics + // _V* variants have different dst size, but the size is encoded implicitly, + // using dmask and tfe. Only 32-bit variant is registered with disassembler. + // Other variants are reconstructed by disassembler using dmask and tfe. + defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm # "_V1", asm, data_rc_32, 1, 1>; + defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm # "_V2", asm, data_rc_64, 0>; } class MIMG_Sampler_Helper <bits<7> op, string asm, @@ -344,7 +370,7 @@ defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">; } defm IMAGE_ATOMIC_SWAP : MIMG_Atomic <mimg<0x0f, 0x10>, "image_atomic_swap">; -defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic <mimg<0x10, 0x11>, "image_atomic_cmpswap", VReg_64>; +defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic <mimg<0x10, 0x11>, "image_atomic_cmpswap", VReg_64, VReg_128>; defm IMAGE_ATOMIC_ADD : MIMG_Atomic <mimg<0x11, 0x12>, "image_atomic_add">; defm IMAGE_ATOMIC_SUB : MIMG_Atomic <mimg<0x12, 0x13>, "image_atomic_sub">; //def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"image_atomic_rsub", 0x00000013>; -- not on VI @@ -590,9 +616,9 @@ class ImageAtomicPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : GC // ImageAtomic patterns. multiclass ImageAtomicPatterns<SDPatternOperator name, string opcode> { - def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V1), i32>; - def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V2), v2i32>; - def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V4), v4i32>; + def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V1_V1), i32>; + def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V1_V2), v2i32>; + def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V1_V4), v4i32>; } // ImageAtomicCmpSwap for amdgcn. @@ -784,9 +810,9 @@ defm : ImageSamplePatterns<int_amdgcn_image_getlod, "IMAGE_GET_LOD">; // Image atomics defm : ImageAtomicPatterns<int_amdgcn_image_atomic_swap, "IMAGE_ATOMIC_SWAP">; -def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V1, i32>; -def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V2, v2i32>; -def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V4, v4i32>; +def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V1_V1, i32>; +def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V1_V2, v2i32>; +def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V1_V4, v4i32>; defm : ImageAtomicPatterns<int_amdgcn_image_atomic_add, "IMAGE_ATOMIC_ADD">; defm : ImageAtomicPatterns<int_amdgcn_image_atomic_sub, "IMAGE_ATOMIC_SUB">; defm : ImageAtomicPatterns<int_amdgcn_image_atomic_smin, "IMAGE_ATOMIC_SMIN">; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index f4516988b19..993aaa26daf 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2040,6 +2040,22 @@ def getMaskedMIMGOp4 : InstrMapping { let ValueCols = [["1"], ["2"], ["3"] ]; } +def getMIMGAtomicOp1 : InstrMapping { + let FilterClass = "MIMG_Atomic_Size"; + let RowFields = ["Op"]; + let ColFields = ["AtomicSize"]; + let KeyCol = ["1"]; + let ValueCols = [["2"]]; +} + +def getMIMGAtomicOp2 : InstrMapping { + let FilterClass = "MIMG_Atomic_Size"; + let RowFields = ["Op"]; + let ColFields = ["AtomicSize"]; + let KeyCol = ["2"]; + let ValueCols = [["1"]]; +} + // Maps an commuted opcode to its original version def getCommuteOrig : InstrMapping { let FilterClass = "Commutable_REV"; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 0deb66b6452..6b4253471fc 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -156,6 +156,28 @@ int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) } } +int getMaskedMIMGAtomicOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) { + assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst) != -1); + assert(NewChannels == 1 || NewChannels == 2 || NewChannels == 4); + + unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass); + assert(OrigChannels == 1 || OrigChannels == 2 || OrigChannels == 4); + + if (NewChannels == OrigChannels) return Opc; + + if (OrigChannels <= 2 && NewChannels <= 2) { + // This is an ordinary atomic (not an atomic_cmpswap) + return (OrigChannels == 1)? + AMDGPU::getMIMGAtomicOp1(Opc) : AMDGPU::getMIMGAtomicOp2(Opc); + } else if (OrigChannels >= 2 && NewChannels >= 2) { + // This is an atomic_cmpswap + return (OrigChannels == 2)? + AMDGPU::getMIMGAtomicOp1(Opc) : AMDGPU::getMIMGAtomicOp2(Opc); + } else { // invalid OrigChannels/NewChannels value + return -1; + } +} + // Wrapper for Tablegen'd function. enum Subtarget is not defined in any // header files, so we need to wrap it in a function that takes unsigned // instead. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 0c1d6976594..7745af1c45c 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -159,6 +159,11 @@ int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); LLVM_READONLY int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels); + +LLVM_READONLY +int getMaskedMIMGAtomicOp(const MCInstrInfo &MII, + unsigned Opc, unsigned NewChannels); + LLVM_READONLY int getMCOpcode(uint16_t Opcode, unsigned Gen); |