diff options
| author | Changpeng Fang <changpeng.fang@gmail.com> | 2018-01-18 22:08:53 +0000 |
|---|---|---|
| committer | Changpeng Fang <changpeng.fang@gmail.com> | 2018-01-18 22:08:53 +0000 |
| commit | 4737e892def6f22ede1d8145569d4ddf147bdb02 (patch) | |
| tree | 789dcab821d5a5a1914d21b9c64b234c58e9d789 /llvm/lib | |
| parent | 668e6b4b05075ab410f020d96e98986ae98d78da (diff) | |
| download | bcm5719-llvm-4737e892def6f22ede1d8145569d4ddf147bdb02.tar.gz bcm5719-llvm-4737e892def6f22ede1d8145569d4ddf147bdb02.zip | |
AMDGPU/SI: Add d16 support for image intrinsics.
Summary:
This patch implements d16 support for image load, image store and image sample intrinsics.
Reviewers:
Matt, Brian.
Differential Revision:
https://reviews.llvm.org/D3991
llvm-svn: 322903
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 77 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 85 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/MIMGInstructions.td | 557 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIDefines.h | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 344 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrFormats.td | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 128 |
8 files changed, 1009 insertions, 202 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 2b0aa301885..0ec2e8ebd34 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4002,6 +4002,83 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BUFFER_ATOMIC_OR) NODE_NAME_CASE(BUFFER_ATOMIC_XOR) NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP) + NODE_NAME_CASE(IMAGE_LOAD) + NODE_NAME_CASE(IMAGE_LOAD_MIP) + NODE_NAME_CASE(IMAGE_STORE) + NODE_NAME_CASE(IMAGE_STORE_MIP) + // Basic sample. + NODE_NAME_CASE(IMAGE_SAMPLE) + NODE_NAME_CASE(IMAGE_SAMPLE_CL) + NODE_NAME_CASE(IMAGE_SAMPLE_D) + NODE_NAME_CASE(IMAGE_SAMPLE_D_CL) + NODE_NAME_CASE(IMAGE_SAMPLE_L) + NODE_NAME_CASE(IMAGE_SAMPLE_B) + NODE_NAME_CASE(IMAGE_SAMPLE_B_CL) + NODE_NAME_CASE(IMAGE_SAMPLE_LZ) + NODE_NAME_CASE(IMAGE_SAMPLE_CD) + NODE_NAME_CASE(IMAGE_SAMPLE_CD_CL) + // Sample with comparison. + NODE_NAME_CASE(IMAGE_SAMPLE_C) + NODE_NAME_CASE(IMAGE_SAMPLE_C_CL) + NODE_NAME_CASE(IMAGE_SAMPLE_C_D) + NODE_NAME_CASE(IMAGE_SAMPLE_C_D_CL) + NODE_NAME_CASE(IMAGE_SAMPLE_C_L) + NODE_NAME_CASE(IMAGE_SAMPLE_C_B) + NODE_NAME_CASE(IMAGE_SAMPLE_C_B_CL) + NODE_NAME_CASE(IMAGE_SAMPLE_C_LZ) + NODE_NAME_CASE(IMAGE_SAMPLE_C_CD) + NODE_NAME_CASE(IMAGE_SAMPLE_C_CD_CL) + // Sample with offsets. + NODE_NAME_CASE(IMAGE_SAMPLE_O) + NODE_NAME_CASE(IMAGE_SAMPLE_CL_O) + NODE_NAME_CASE(IMAGE_SAMPLE_D_O) + NODE_NAME_CASE(IMAGE_SAMPLE_D_CL_O) + NODE_NAME_CASE(IMAGE_SAMPLE_L_O) + NODE_NAME_CASE(IMAGE_SAMPLE_B_O) + NODE_NAME_CASE(IMAGE_SAMPLE_B_CL_O) + NODE_NAME_CASE(IMAGE_SAMPLE_LZ_O) + NODE_NAME_CASE(IMAGE_SAMPLE_CD_O) + NODE_NAME_CASE(IMAGE_SAMPLE_CD_CL_O) + // Sample with comparison and offsets. + NODE_NAME_CASE(IMAGE_SAMPLE_C_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_CL_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_D_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_D_CL_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_L_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_B_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_B_CL_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_LZ_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_CD_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_CD_CL_O) + // Basic gather4. + NODE_NAME_CASE(IMAGE_GATHER4) + NODE_NAME_CASE(IMAGE_GATHER4_CL) + NODE_NAME_CASE(IMAGE_GATHER4_L) + NODE_NAME_CASE(IMAGE_GATHER4_B) + NODE_NAME_CASE(IMAGE_GATHER4_B_CL) + NODE_NAME_CASE(IMAGE_GATHER4_LZ) + // Gather4 with comparison. + NODE_NAME_CASE(IMAGE_GATHER4_C) + NODE_NAME_CASE(IMAGE_GATHER4_C_CL) + NODE_NAME_CASE(IMAGE_GATHER4_C_L) + NODE_NAME_CASE(IMAGE_GATHER4_C_B) + NODE_NAME_CASE(IMAGE_GATHER4_C_B_CL) + NODE_NAME_CASE(IMAGE_GATHER4_C_LZ) + // Gather4 with offsets. + NODE_NAME_CASE(IMAGE_GATHER4_O) + NODE_NAME_CASE(IMAGE_GATHER4_CL_O) + NODE_NAME_CASE(IMAGE_GATHER4_L_O) + NODE_NAME_CASE(IMAGE_GATHER4_B_O) + NODE_NAME_CASE(IMAGE_GATHER4_B_CL_O) + NODE_NAME_CASE(IMAGE_GATHER4_LZ_O) + // Gather4 with comparison and offsets. + NODE_NAME_CASE(IMAGE_GATHER4_C_O) + NODE_NAME_CASE(IMAGE_GATHER4_C_CL_O) + NODE_NAME_CASE(IMAGE_GATHER4_C_L_O) + NODE_NAME_CASE(IMAGE_GATHER4_C_B_O) + NODE_NAME_CASE(IMAGE_GATHER4_C_B_CL_O) + NODE_NAME_CASE(IMAGE_GATHER4_C_LZ_O) + case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break; } return nullptr; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 35e4f570b9f..a16402e3c98 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -477,6 +477,91 @@ enum NodeType : unsigned { BUFFER_ATOMIC_OR, BUFFER_ATOMIC_XOR, BUFFER_ATOMIC_CMPSWAP, + IMAGE_LOAD, + IMAGE_LOAD_MIP, + IMAGE_STORE, + IMAGE_STORE_MIP, + + // Basic sample. + IMAGE_SAMPLE, + IMAGE_SAMPLE_CL, + IMAGE_SAMPLE_D, + IMAGE_SAMPLE_D_CL, + IMAGE_SAMPLE_L, + IMAGE_SAMPLE_B, + IMAGE_SAMPLE_B_CL, + IMAGE_SAMPLE_LZ, + IMAGE_SAMPLE_CD, + IMAGE_SAMPLE_CD_CL, + + // Sample with comparison. + IMAGE_SAMPLE_C, + IMAGE_SAMPLE_C_CL, + IMAGE_SAMPLE_C_D, + IMAGE_SAMPLE_C_D_CL, + IMAGE_SAMPLE_C_L, + IMAGE_SAMPLE_C_B, + IMAGE_SAMPLE_C_B_CL, + IMAGE_SAMPLE_C_LZ, + IMAGE_SAMPLE_C_CD, + IMAGE_SAMPLE_C_CD_CL, + + // Sample with offsets. + IMAGE_SAMPLE_O, + IMAGE_SAMPLE_CL_O, + IMAGE_SAMPLE_D_O, + IMAGE_SAMPLE_D_CL_O, + IMAGE_SAMPLE_L_O, + IMAGE_SAMPLE_B_O, + IMAGE_SAMPLE_B_CL_O, + IMAGE_SAMPLE_LZ_O, + IMAGE_SAMPLE_CD_O, + IMAGE_SAMPLE_CD_CL_O, + + // Sample with comparison and offsets. + IMAGE_SAMPLE_C_O, + IMAGE_SAMPLE_C_CL_O, + IMAGE_SAMPLE_C_D_O, + IMAGE_SAMPLE_C_D_CL_O, + IMAGE_SAMPLE_C_L_O, + IMAGE_SAMPLE_C_B_O, + IMAGE_SAMPLE_C_B_CL_O, + IMAGE_SAMPLE_C_LZ_O, + IMAGE_SAMPLE_C_CD_O, + IMAGE_SAMPLE_C_CD_CL_O, + + // Basic gather4. + IMAGE_GATHER4, + IMAGE_GATHER4_CL, + IMAGE_GATHER4_L, + IMAGE_GATHER4_B, + IMAGE_GATHER4_B_CL, + IMAGE_GATHER4_LZ, + + // Gather4 with comparison. + IMAGE_GATHER4_C, + IMAGE_GATHER4_C_CL, + IMAGE_GATHER4_C_L, + IMAGE_GATHER4_C_B, + IMAGE_GATHER4_C_B_CL, + IMAGE_GATHER4_C_LZ, + + // Gather4 with offsets. + IMAGE_GATHER4_O, + IMAGE_GATHER4_CL_O, + IMAGE_GATHER4_L_O, + IMAGE_GATHER4_B_O, + IMAGE_GATHER4_B_CL_O, + IMAGE_GATHER4_LZ_O, + + // Gather4 with comparison and offsets. + IMAGE_GATHER4_C_O, + IMAGE_GATHER4_C_CL_O, + IMAGE_GATHER4_C_L_O, + IMAGE_GATHER4_C_B_O, + IMAGE_GATHER4_C_B_CL_O, + IMAGE_GATHER4_C_LZ_O, + LAST_AMDGPU_ISD_NUMBER }; diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 651265fc54d..9fd0abd9a3d 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -32,26 +32,45 @@ class MIMG_Helper <dag outs, dag ins, string asm, class MIMG_NoSampler_Helper <bits<7> op, string asm, RegisterClass dst_rc, RegisterClass addr_rc, + bit d16_bit=0, string dns=""> : MIMG_Helper < (outs dst_rc:$vdata), (ins addr_rc:$vaddr, SReg_256:$srsrc, dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc, r128:$r128, tfe:$tfe, lwe:$lwe, da:$da), - asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da", + asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), dns>, MIMGe<op> { let ssamp = 0; + let D16 = d16; +} + +multiclass MIMG_NoSampler_Src_Helper_Helper <bits<7> op, string asm, + RegisterClass dst_rc, + int channels, bit d16_bit, + string suffix> { + def _V1 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32, d16_bit, + !if(!eq(channels, 1), "AMDGPU", "")>, + MIMG_Mask<asm#"_V1"#suffix, channels>; + def _V2 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64, d16_bit>, + MIMG_Mask<asm#"_V2"#suffix, channels>; + def _V4 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128, d16_bit>, + MIMG_Mask<asm#"_V4"#suffix, channels>; } multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm, RegisterClass dst_rc, int channels> { - def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32, - !if(!eq(channels, 1), "AMDGPU", "")>, - MIMG_Mask<asm#"_V1", channels>; - def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>, - MIMG_Mask<asm#"_V2", channels>; - def _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>, - MIMG_Mask<asm#"_V4", channels>; + defm : MIMG_NoSampler_Src_Helper_Helper <op, asm, dst_rc, channels, 0, "">; + + let d16 = 1 in { + let SubtargetPredicate = HasPackedD16VMem in { + defm : MIMG_NoSampler_Src_Helper_Helper <op, asm, dst_rc, channels, 1, "_D16">; + } // End HasPackedD16VMem. + + let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in { + defm : MIMG_NoSampler_Src_Helper_Helper <op, asm, dst_rc, channels, 1, "_D16_gfx80">; + } // End HasUnpackedD16VMem. + } // End d16 = 1. } multiclass MIMG_NoSampler <bits<7> op, string asm> { @@ -64,30 +83,49 @@ multiclass MIMG_NoSampler <bits<7> op, string asm> { class MIMG_Store_Helper <bits<7> op, string asm, RegisterClass data_rc, RegisterClass addr_rc, + bit d16_bit=0, string dns = ""> : MIMG_Helper < (outs), (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc, r128:$r128, tfe:$tfe, lwe:$lwe, da:$da), - asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da", dns>, MIMGe<op> { + asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), dns>, MIMGe<op> { let ssamp = 0; let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; let hasPostISelHook = 0; let DisableWQM = 1; + let D16 = d16; +} + +multiclass MIMG_Store_Addr_Helper_Helper <bits<7> op, string asm, + RegisterClass data_rc, + int channels, bit d16_bit, + string suffix> { + def _V1 # suffix : MIMG_Store_Helper <op, asm, data_rc, VGPR_32, d16_bit, + !if(!eq(channels, 1), "AMDGPU", "")>, + MIMG_Mask<asm#"_V1"#suffix, channels>; + def _V2 # suffix : MIMG_Store_Helper <op, asm, data_rc, VReg_64, d16_bit>, + MIMG_Mask<asm#"_V2"#suffix, channels>; + def _V4 # suffix : MIMG_Store_Helper <op, asm, data_rc, VReg_128, d16_bit>, + MIMG_Mask<asm#"_V4"#suffix, channels>; } multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm, RegisterClass data_rc, int channels> { - def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32, - !if(!eq(channels, 1), "AMDGPU", "")>, - MIMG_Mask<asm#"_V1", channels>; - def _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>, - MIMG_Mask<asm#"_V2", channels>; - def _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128>, - MIMG_Mask<asm#"_V4", channels>; + defm : MIMG_Store_Addr_Helper_Helper <op, asm, data_rc, channels, 0, "">; + + let d16 = 1 in { + let SubtargetPredicate = HasPackedD16VMem in { + defm : MIMG_Store_Addr_Helper_Helper <op, asm, data_rc, channels, 1, "_D16">; + } // End HasPackedD16VMem. + + let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in { + defm : MIMG_Store_Addr_Helper_Helper <op, asm, data_rc, channels, 1, "_D16_gfx80">; + } // End HasUnpackedD16VMem. + } // End d16 = 1. } multiclass MIMG_Store <bits<7> op, string asm> { @@ -159,30 +197,49 @@ class MIMG_Sampler_Helper <bits<7> op, string asm, RegisterClass dst_rc, RegisterClass src_rc, bit wqm, + bit d16_bit=0, string dns=""> : MIMG_Helper < (outs dst_rc:$vdata), (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc, r128:$r128, tfe:$tfe, lwe:$lwe, da:$da), - asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da", + asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), dns>, MIMGe<op> { let WQM = wqm; + let D16 = d16; +} + +multiclass MIMG_Sampler_Src_Helper_Helper <bits<7> op, string asm, + RegisterClass dst_rc, + int channels, bit wqm, + bit d16_bit, string suffix> { + def _V1 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm, d16_bit, + !if(!eq(channels, 1), "AMDGPU", "")>, + MIMG_Mask<asm#"_V1"#suffix, channels>; + def _V2 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm, d16_bit>, + MIMG_Mask<asm#"_V2"#suffix, channels>; + def _V4 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm, d16_bit>, + MIMG_Mask<asm#"_V4"#suffix, channels>; + def _V8 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm, d16_bit>, + MIMG_Mask<asm#"_V8"#suffix, channels>; + def _V16 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm, d16_bit>, + MIMG_Mask<asm#"_V16"#suffix, channels>; } multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm, RegisterClass dst_rc, int channels, bit wqm> { - def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm, - !if(!eq(channels, 1), "AMDGPU", "")>, - MIMG_Mask<asm#"_V1", channels>; - def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm>, - MIMG_Mask<asm#"_V2", channels>; - def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm>, - MIMG_Mask<asm#"_V4", channels>; - def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm>, - MIMG_Mask<asm#"_V8", channels>; - def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm>, - MIMG_Mask<asm#"_V16", channels>; + defm : MIMG_Sampler_Src_Helper_Helper <op, asm, dst_rc, channels, wqm, 0, "">; + + let d16 = 1 in { + let SubtargetPredicate = HasPackedD16VMem in { + defm : MIMG_Sampler_Src_Helper_Helper <op, asm, dst_rc, channels, wqm, 1, "_D16">; + } // End HasPackedD16VMem. + + let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in { + defm : MIMG_Sampler_Src_Helper_Helper <op, asm, dst_rc, channels, wqm, 1, "_D16_gfx80">; + } // End HasUnpackedD16VMem. + } // End d16 = 1. } multiclass MIMG_Sampler <bits<7> op, string asm, bit wqm=0> { @@ -196,12 +253,12 @@ multiclass MIMG_Sampler_WQM <bits<7> op, string asm> : MIMG_Sampler<op, asm, 1>; class MIMG_Gather_Helper <bits<7> op, string asm, RegisterClass dst_rc, - RegisterClass src_rc, bit wqm> : MIMG < + RegisterClass src_rc, bit wqm, bit d16_bit=0> : MIMG < (outs dst_rc:$vdata), (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc, r128:$r128, tfe:$tfe, lwe:$lwe, da:$da), - asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da", + asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), []>, MIMGe<op> { let mayLoad = 1; let mayStore = 0; @@ -216,23 +273,42 @@ class MIMG_Gather_Helper <bits<7> op, string asm, let Gather4 = 1; let hasPostISelHook = 0; let WQM = wqm; + let D16 = d16; let isAsmParserOnly = 1; // TBD: fix it later } + +multiclass MIMG_Gather_Src_Helper_Helper <bits<7> op, string asm, + RegisterClass dst_rc, + int channels, bit wqm, + bit d16_bit, string suffix> { + def _V1 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm, d16_bit>, + MIMG_Mask<asm#"_V1"#suffix, channels>; + def _V2 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm, d16_bit>, + MIMG_Mask<asm#"_V2"#suffix, channels>; + def _V4 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm, d16_bit>, + MIMG_Mask<asm#"_V4"#suffix, channels>; + def _V8 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm, d16_bit>, + MIMG_Mask<asm#"_V8"#suffix, channels>; + def _V16 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm, d16_bit>, + MIMG_Mask<asm#"_V16"#suffix, channels>; +} + multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm, RegisterClass dst_rc, int channels, bit wqm> { - def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm>, - MIMG_Mask<asm#"_V1", channels>; - def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm>, - MIMG_Mask<asm#"_V2", channels>; - def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm>, - MIMG_Mask<asm#"_V4", channels>; - def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm>, - MIMG_Mask<asm#"_V8", channels>; - def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm>, - MIMG_Mask<asm#"_V16", channels>; + defm : MIMG_Gather_Src_Helper_Helper<op, asm, dst_rc, channels, wqm, 0, "">; + + let d16 = 1 in { + let SubtargetPredicate = HasPackedD16VMem in { + defm : MIMG_Gather_Src_Helper_Helper<op, asm, dst_rc, channels, wqm, 1, "_D16">; + } // End HasPackedD16VMem. + + let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in { + defm : MIMG_Gather_Src_Helper_Helper<op, asm, dst_rc, channels, wqm, 1, "_D16_gfx80">; + } // End HasUnpackedD16VMem. + } // End d16 = 1. } multiclass MIMG_Gather <bits<7> op, string asm, bit wqm=0> { @@ -357,29 +433,11 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o" /********** Image sampling patterns **********/ /********** ======================= **********/ -// Image + sampler -class SampleRawPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : GCNPat < - (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm, - i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe), - (opcode $addr, $rsrc, $sampler, - (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc), - (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $da)) ->; - -multiclass SampleRawPatterns<SDPatternOperator name, string opcode> { - def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V1), i32>; - def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>; - def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>; - def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V8), v8i32>; - def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V16), v16i32>; -} - -// Image + sampler for amdgcn +// ImageSample for amdgcn // TODO: -// 1. Handle half data type like v4f16, and add D16 bit support; -// 2. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128). -// 3. Add A16 support when we pass address of half type. -multiclass AMDGCNSamplePattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> { +// 1. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128). +// 2. Add A16 support when we pass address of half type. +multiclass ImageSamplePattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> { def : GCNPat< (dt (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, i1:$glc, i1:$slc, i1:$lwe, i1:$da)), @@ -389,36 +447,44 @@ multiclass AMDGCNSamplePattern<SDPatternOperator name, MIMG opcode, ValueType dt >; } -multiclass AMDGCNSampleDataPatterns<SDPatternOperator name, string opcode, ValueType dt> { - defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V1), dt, f32>; - defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V2), dt, v2f32>; - defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V4), dt, v4f32>; - defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V8), dt, v8f32>; - defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V16), dt, v16f32>; +multiclass ImageSampleDataPatterns<SDPatternOperator name, string opcode, ValueType dt, string suffix = ""> { + defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V1 # suffix), dt, f32>; + defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V2 # suffix), dt, v2f32>; + defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V4 # suffix), dt, v4f32>; + defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V8 # suffix), dt, v8f32>; + defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V16 # suffix), dt, v16f32>; } -// TODO: support v3f32. -multiclass AMDGCNSamplePatterns<SDPatternOperator name, string opcode> { - defm : AMDGCNSampleDataPatterns<name, !cast<string>(opcode # _V1), f32>; - defm : AMDGCNSampleDataPatterns<name, !cast<string>(opcode # _V2), v2f32>; - defm : AMDGCNSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32>; +// ImageSample patterns. +multiclass ImageSamplePatterns<SDPatternOperator name, string opcode> { + defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f32>; + defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2f32>; + defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32>; + + let SubtargetPredicate = HasUnpackedD16VMem in { + defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16_gfx80">; + } // End HasUnpackedD16VMem. + + let SubtargetPredicate = HasPackedD16VMem in { + defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">; + defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">; + } // End HasPackedD16VMem. } -// Image only -class ImagePattern<SDPatternOperator name, MIMG opcode, ValueType vt> : GCNPat < - (name vt:$addr, v8i32:$rsrc, imm:$dmask, imm:$unorm, - imm:$r128, imm:$da, imm:$glc, imm:$slc, imm:$tfe, imm:$lwe), - (opcode $addr, $rsrc, - (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc), - (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $da)) ->; +// ImageSample alternative patterns for illegal vector half Types. +multiclass ImageSampleAltPatterns<SDPatternOperator name, string opcode> { + let SubtargetPredicate = HasUnpackedD16VMem in { + defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">; + defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">; + } // End HasUnpackedD16VMem. -multiclass ImagePatterns<SDPatternOperator name, string opcode> { - def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V1), i32>; - def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>; - def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>; + let SubtargetPredicate = HasPackedD16VMem in { + defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), i32, "_D16">; + defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16">; + } // End HasPackedD16VMem. } +// ImageLoad for amdgcn. multiclass ImageLoadPattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> { def : GCNPat < (dt (name vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe, @@ -429,19 +495,43 @@ multiclass ImageLoadPattern<SDPatternOperator name, MIMG opcode, ValueType dt, V >; } -multiclass ImageLoadDataPatterns<SDPatternOperator name, string opcode, ValueType dt> { - defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V1), dt, i32>; - defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V2), dt, v2i32>; - defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4), dt, v4i32>; +multiclass ImageLoadDataPatterns<SDPatternOperator name, string opcode, ValueType dt, string suffix = ""> { + defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V1 # suffix), dt, i32>; + defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V2 # suffix), dt, v2i32>; + defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4 # suffix), dt, v4i32>; } +// ImageLoad patterns. // TODO: support v3f32. multiclass ImageLoadPatterns<SDPatternOperator name, string opcode> { defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f32>; defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2f32>; defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4f32>; + + let SubtargetPredicate = HasUnpackedD16VMem in { + defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16_gfx80">; + } // End HasUnpackedD16VMem. + + let SubtargetPredicate = HasPackedD16VMem in { + defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">; + defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">; + } // End HasPackedD16VMem. } +// ImageLoad alternative patterns for illegal vector half Types. +multiclass ImageLoadAltPatterns<SDPatternOperator name, string opcode> { + let SubtargetPredicate = HasUnpackedD16VMem in { + defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">; + defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">; + } // End HasUnPackedD16VMem. + + let SubtargetPredicate = HasPackedD16VMem in { + defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), i32, "_D16">; + defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16">; + } // End HasPackedD16VMem. +} + +// ImageStore for amdgcn. multiclass ImageStorePattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> { def : GCNPat < (name dt:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, @@ -452,30 +542,56 @@ multiclass ImageStorePattern<SDPatternOperator name, MIMG opcode, ValueType dt, >; } -multiclass ImageStoreDataPatterns<SDPatternOperator name, string opcode, ValueType dt> { - defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V1), dt, i32>; - defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V2), dt, v2i32>; - defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4), dt, v4i32>; +multiclass ImageStoreDataPatterns<SDPatternOperator name, string opcode, ValueType dt, string suffix = ""> { + defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V1 # suffix), dt, i32>; + defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V2 # suffix), dt, v2i32>; + defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4 # suffix), dt, v4i32>; } +// ImageStore patterns. // TODO: support v3f32. multiclass ImageStorePatterns<SDPatternOperator name, string opcode> { defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f32>; defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2f32>; defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4f32>; + + let SubtargetPredicate = HasUnpackedD16VMem in { + defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16_gfx80">; + } // End HasUnpackedD16VMem. + + let SubtargetPredicate = HasPackedD16VMem in { + defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">; + defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">; + } // End HasPackedD16VMem. } +// ImageStore alternative patterns. +multiclass ImageStoreAltPatterns<SDPatternOperator name, string opcode> { + let SubtargetPredicate = HasUnpackedD16VMem in { + defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">; + defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">; + } // End HasUnpackedD16VMem. + + let SubtargetPredicate = HasPackedD16VMem in { + defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), i32, "_D16">; + defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16">; + } // End HasPackedD16VMem. +} + +// ImageAtomic for amdgcn. class ImageAtomicPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : GCNPat < (name i32:$vdata, vt:$addr, v8i32:$rsrc, imm:$r128, imm:$da, imm:$slc), (opcode $vdata, $addr, $rsrc, 1, 1, 1, (as_i1imm $slc), (as_i1imm $r128), 0, 0, (as_i1imm $da)) >; +// ImageAtomic patterns. multiclass ImageAtomicPatterns<SDPatternOperator name, string opcode> { def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V1), i32>; def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V2), v2i32>; def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V4), v4i32>; } +// ImageAtomicCmpSwap for amdgcn. class ImageAtomicCmpSwapPattern<MIMG opcode, ValueType vt> : GCNPat < (int_amdgcn_image_atomic_cmpswap i32:$vsrc, i32:$vcmp, vt:$addr, v8i32:$rsrc, imm:$r128, imm:$da, imm:$slc), @@ -487,93 +603,180 @@ class ImageAtomicCmpSwapPattern<MIMG opcode, ValueType vt> : GCNPat < // ======= amdgcn Image Intrinsics ============== -// Image load +// Image load. defm : ImageLoadPatterns<int_amdgcn_image_load, "IMAGE_LOAD">; defm : ImageLoadPatterns<int_amdgcn_image_load_mip, "IMAGE_LOAD_MIP">; defm : ImageLoadPatterns<int_amdgcn_image_getresinfo, "IMAGE_GET_RESINFO">; - -// Image store -defm : ImageStorePatterns<int_amdgcn_image_store, "IMAGE_STORE">; -defm : ImageStorePatterns<int_amdgcn_image_store_mip, "IMAGE_STORE_MIP">; - -// Basic sample -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample, "IMAGE_SAMPLE">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cl, "IMAGE_SAMPLE_CL">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_d, "IMAGE_SAMPLE_D">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_d_cl, "IMAGE_SAMPLE_D_CL">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_l, "IMAGE_SAMPLE_L">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_b, "IMAGE_SAMPLE_B">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_b_cl, "IMAGE_SAMPLE_B_CL">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_lz, "IMAGE_SAMPLE_LZ">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cd, "IMAGE_SAMPLE_CD">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cd_cl, "IMAGE_SAMPLE_CD_CL">; - -// Sample with comparison -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c, "IMAGE_SAMPLE_C">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cl, "IMAGE_SAMPLE_C_CL">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_d, "IMAGE_SAMPLE_C_D">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_d_cl, "IMAGE_SAMPLE_C_D_CL">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_l, "IMAGE_SAMPLE_C_L">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_b, "IMAGE_SAMPLE_C_B">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_b_cl, "IMAGE_SAMPLE_C_B_CL">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_lz, "IMAGE_SAMPLE_C_LZ">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cd, "IMAGE_SAMPLE_C_CD">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cd_cl, "IMAGE_SAMPLE_C_CD_CL">; - -// Sample with offsets -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_o, "IMAGE_SAMPLE_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cl_o, "IMAGE_SAMPLE_CL_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_d_o, "IMAGE_SAMPLE_D_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_d_cl_o, "IMAGE_SAMPLE_D_CL_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_l_o, "IMAGE_SAMPLE_L_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_b_o, "IMAGE_SAMPLE_B_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_b_cl_o, "IMAGE_SAMPLE_B_CL_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_lz_o, "IMAGE_SAMPLE_LZ_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cd_o, "IMAGE_SAMPLE_CD_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cd_cl_o, "IMAGE_SAMPLE_CD_CL_O">; - -// Sample with comparison and offsets -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_o, "IMAGE_SAMPLE_C_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cl_o, "IMAGE_SAMPLE_C_CL_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_d_o, "IMAGE_SAMPLE_C_D_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_d_cl_o, "IMAGE_SAMPLE_C_D_CL_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_l_o, "IMAGE_SAMPLE_C_L_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_b_o, "IMAGE_SAMPLE_C_B_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_b_cl_o, "IMAGE_SAMPLE_C_B_CL_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_lz_o, "IMAGE_SAMPLE_C_LZ_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cd_o, "IMAGE_SAMPLE_C_CD_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cd_cl_o, "IMAGE_SAMPLE_C_CD_CL_O">; - -// Gather opcodes -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4, "IMAGE_GATHER4">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_cl, "IMAGE_GATHER4_CL">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_l, "IMAGE_GATHER4_L">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_b, "IMAGE_GATHER4_B">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_b_cl, "IMAGE_GATHER4_B_CL">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_lz, "IMAGE_GATHER4_LZ">; - -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c, "IMAGE_GATHER4_C">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_cl, "IMAGE_GATHER4_C_CL">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_l, "IMAGE_GATHER4_C_L">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_b, "IMAGE_GATHER4_C_B">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_b_cl, "IMAGE_GATHER4_C_B_CL">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_lz, "IMAGE_GATHER4_C_LZ">; - -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_o, "IMAGE_GATHER4_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_cl_o, "IMAGE_GATHER4_CL_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_l_o, "IMAGE_GATHER4_L_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_b_o, "IMAGE_GATHER4_B_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_b_cl_o, "IMAGE_GATHER4_B_CL_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_lz_o, "IMAGE_GATHER4_LZ_O">; - -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_o, "IMAGE_GATHER4_C_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_cl_o, "IMAGE_GATHER4_C_CL_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_l_o, "IMAGE_GATHER4_C_L_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_b_o, "IMAGE_GATHER4_C_B_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_b_cl_o, "IMAGE_GATHER4_C_B_CL_O">; -defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_lz_o, "IMAGE_GATHER4_C_LZ_O">; - -defm : AMDGCNSamplePatterns<int_amdgcn_image_getlod, "IMAGE_GET_LOD">; +defm : ImageLoadAltPatterns<SIImage_load, "IMAGE_LOAD">; +defm : ImageLoadAltPatterns<SIImage_load_mip, "IMAGE_LOAD_MIP">; + +// Image store. +defm : ImageStorePatterns<SIImage_store, "IMAGE_STORE">; +defm : ImageStorePatterns<SIImage_store_mip, "IMAGE_STORE_MIP">; +defm : ImageStoreAltPatterns<SIImage_store, "IMAGE_STORE">; +defm : ImageStoreAltPatterns<SIImage_store_mip, "IMAGE_STORE_MIP">; + +// Basic sample. +defm : ImageSamplePatterns<int_amdgcn_image_sample, "IMAGE_SAMPLE">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_cl, "IMAGE_SAMPLE_CL">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_d, "IMAGE_SAMPLE_D">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_d_cl, "IMAGE_SAMPLE_D_CL">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_l, "IMAGE_SAMPLE_L">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_b, "IMAGE_SAMPLE_B">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_b_cl, "IMAGE_SAMPLE_B_CL">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_lz, "IMAGE_SAMPLE_LZ">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_cd, "IMAGE_SAMPLE_CD">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_cd_cl, "IMAGE_SAMPLE_CD_CL">; + +// Sample with comparison. +defm : ImageSamplePatterns<int_amdgcn_image_sample_c, "IMAGE_SAMPLE_C">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_cl, "IMAGE_SAMPLE_C_CL">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_d, "IMAGE_SAMPLE_C_D">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_d_cl, "IMAGE_SAMPLE_C_D_CL">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_l, "IMAGE_SAMPLE_C_L">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_b, "IMAGE_SAMPLE_C_B">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_b_cl, "IMAGE_SAMPLE_C_B_CL">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_lz, "IMAGE_SAMPLE_C_LZ">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_cd, "IMAGE_SAMPLE_C_CD">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_cd_cl, "IMAGE_SAMPLE_C_CD_CL">; + +// Sample with offsets. +defm : ImageSamplePatterns<int_amdgcn_image_sample_o, "IMAGE_SAMPLE_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_cl_o, "IMAGE_SAMPLE_CL_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_d_o, "IMAGE_SAMPLE_D_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_d_cl_o, "IMAGE_SAMPLE_D_CL_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_l_o, "IMAGE_SAMPLE_L_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_b_o, "IMAGE_SAMPLE_B_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_b_cl_o, "IMAGE_SAMPLE_B_CL_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_lz_o, "IMAGE_SAMPLE_LZ_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_cd_o, "IMAGE_SAMPLE_CD_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_cd_cl_o, "IMAGE_SAMPLE_CD_CL_O">; + +// Sample with comparison and offsets. +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_o, "IMAGE_SAMPLE_C_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_cl_o, "IMAGE_SAMPLE_C_CL_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_d_o, "IMAGE_SAMPLE_C_D_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_d_cl_o, "IMAGE_SAMPLE_C_D_CL_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_l_o, "IMAGE_SAMPLE_C_L_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_b_o, "IMAGE_SAMPLE_C_B_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_b_cl_o, "IMAGE_SAMPLE_C_B_CL_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_lz_o, "IMAGE_SAMPLE_C_LZ_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_cd_o, "IMAGE_SAMPLE_C_CD_O">; +defm : ImageSamplePatterns<int_amdgcn_image_sample_c_cd_cl_o, "IMAGE_SAMPLE_C_CD_CL_O">; + +// Basic gather4. +defm : ImageSamplePatterns<int_amdgcn_image_gather4, "IMAGE_GATHER4">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_cl, "IMAGE_GATHER4_CL">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_l, "IMAGE_GATHER4_L">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_b, "IMAGE_GATHER4_B">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_b_cl, "IMAGE_GATHER4_B_CL">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_lz, "IMAGE_GATHER4_LZ">; + +// Gather4 with comparison. +defm : ImageSamplePatterns<int_amdgcn_image_gather4_c, "IMAGE_GATHER4_C">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_cl, "IMAGE_GATHER4_C_CL">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_l, "IMAGE_GATHER4_C_L">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_b, "IMAGE_GATHER4_C_B">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_b_cl, "IMAGE_GATHER4_C_B_CL">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_lz, "IMAGE_GATHER4_C_LZ">; + +// Gather4 with offsets. +defm : ImageSamplePatterns<int_amdgcn_image_gather4_o, "IMAGE_GATHER4_O">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_cl_o, "IMAGE_GATHER4_CL_O">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_l_o, "IMAGE_GATHER4_L_O">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_b_o, "IMAGE_GATHER4_B_O">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_b_cl_o, "IMAGE_GATHER4_B_CL_O">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_lz_o, "IMAGE_GATHER4_LZ_O">; + +// Gather4 with comparison and offsets. +defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_o, "IMAGE_GATHER4_C_O">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_cl_o, "IMAGE_GATHER4_C_CL_O">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_l_o, "IMAGE_GATHER4_C_L_O">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_b_o, "IMAGE_GATHER4_C_B_O">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_b_cl_o, "IMAGE_GATHER4_C_B_CL_O">; +defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_lz_o, "IMAGE_GATHER4_C_LZ_O">; + +// Basic sample alternative. +defm : ImageSampleAltPatterns<SIImage_sample, "IMAGE_SAMPLE">; +defm : ImageSampleAltPatterns<SIImage_sample_cl, "IMAGE_SAMPLE_CL">; +defm : ImageSampleAltPatterns<SIImage_sample_d, "IMAGE_SAMPLE_D">; +defm : ImageSampleAltPatterns<SIImage_sample_d_cl, "IMAGE_SAMPLE_D_CL">; +defm : ImageSampleAltPatterns<SIImage_sample_l, "IMAGE_SAMPLE_L">; +defm : ImageSampleAltPatterns<SIImage_sample_b, "IMAGE_SAMPLE_B">; +defm : ImageSampleAltPatterns<SIImage_sample_b_cl, "IMAGE_SAMPLE_B_CL">; +defm : ImageSampleAltPatterns<SIImage_sample_lz, "IMAGE_SAMPLE_LZ">; +defm : ImageSampleAltPatterns<SIImage_sample_cd, "IMAGE_SAMPLE_CD">; +defm : ImageSampleAltPatterns<SIImage_sample_cd_cl, "IMAGE_SAMPLE_CD_CL">; + +// Sample with comparison alternative. +defm : ImageSampleAltPatterns<SIImage_sample_c, "IMAGE_SAMPLE_C">; +defm : ImageSampleAltPatterns<SIImage_sample_c_cl, "IMAGE_SAMPLE_C_CL">; +defm : ImageSampleAltPatterns<SIImage_sample_c_d, "IMAGE_SAMPLE_C_D">; +defm : ImageSampleAltPatterns<SIImage_sample_c_d_cl, "IMAGE_SAMPLE_C_D_CL">; +defm : ImageSampleAltPatterns<SIImage_sample_c_l, "IMAGE_SAMPLE_C_L">; +defm : ImageSampleAltPatterns<SIImage_sample_c_b, "IMAGE_SAMPLE_C_B">; +defm : ImageSampleAltPatterns<SIImage_sample_c_b_cl, "IMAGE_SAMPLE_C_B_CL">; +defm : ImageSampleAltPatterns<SIImage_sample_c_lz, "IMAGE_SAMPLE_C_LZ">; +defm : ImageSampleAltPatterns<SIImage_sample_c_cd, "IMAGE_SAMPLE_C_CD">; +defm : ImageSampleAltPatterns<SIImage_sample_c_cd_cl, "IMAGE_SAMPLE_C_CD_CL">; + +// Sample with offsets alternative. +defm : ImageSampleAltPatterns<SIImage_sample_o, "IMAGE_SAMPLE_O">; +defm : ImageSampleAltPatterns<SIImage_sample_cl_o, "IMAGE_SAMPLE_CL_O">; +defm : ImageSampleAltPatterns<SIImage_sample_d_o, "IMAGE_SAMPLE_D_O">; +defm : ImageSampleAltPatterns<SIImage_sample_d_cl_o, "IMAGE_SAMPLE_D_CL_O">; +defm : ImageSampleAltPatterns<SIImage_sample_l_o, "IMAGE_SAMPLE_L_O">; +defm : ImageSampleAltPatterns<SIImage_sample_b_o, "IMAGE_SAMPLE_B_O">; +defm : ImageSampleAltPatterns<SIImage_sample_b_cl_o, "IMAGE_SAMPLE_B_CL_O">; +defm : ImageSampleAltPatterns<SIImage_sample_lz_o, "IMAGE_SAMPLE_LZ_O">; +defm : ImageSampleAltPatterns<SIImage_sample_cd_o, "IMAGE_SAMPLE_CD_O">; +defm : ImageSampleAltPatterns<SIImage_sample_cd_cl_o, "IMAGE_SAMPLE_CD_CL_O">; + +// Sample with comparison and offsets alternative. +defm : ImageSampleAltPatterns<SIImage_sample_c_o, "IMAGE_SAMPLE_C_O">; +defm : ImageSampleAltPatterns<SIImage_sample_c_cl_o, "IMAGE_SAMPLE_C_CL_O">; +defm : ImageSampleAltPatterns<SIImage_sample_c_d_o, "IMAGE_SAMPLE_C_D_O">; +defm : ImageSampleAltPatterns<SIImage_sample_c_d_cl_o, "IMAGE_SAMPLE_C_D_CL_O">; +defm : ImageSampleAltPatterns<SIImage_sample_c_l_o, "IMAGE_SAMPLE_C_L_O">; +defm : ImageSampleAltPatterns<SIImage_sample_c_b_o, "IMAGE_SAMPLE_C_B_O">; +defm : ImageSampleAltPatterns<SIImage_sample_c_b_cl_o, "IMAGE_SAMPLE_C_B_CL_O">; +defm : ImageSampleAltPatterns<SIImage_sample_c_lz_o, "IMAGE_SAMPLE_C_LZ_O">; +defm : ImageSampleAltPatterns<SIImage_sample_c_cd_o, "IMAGE_SAMPLE_C_CD_O">; +defm : ImageSampleAltPatterns<SIImage_sample_c_cd_cl_o, "IMAGE_SAMPLE_C_CD_CL_O">; + +// Basic gather4 alternative. +defm : ImageSampleAltPatterns<SIImage_gather4, "IMAGE_GATHER4">; +defm : ImageSampleAltPatterns<SIImage_gather4_cl, "IMAGE_GATHER4_CL">; +defm : ImageSampleAltPatterns<SIImage_gather4_l, "IMAGE_GATHER4_L">; +defm : ImageSampleAltPatterns<SIImage_gather4_b, "IMAGE_GATHER4_B">; +defm : ImageSampleAltPatterns<SIImage_gather4_b_cl, "IMAGE_GATHER4_B_CL">; +defm : ImageSampleAltPatterns<SIImage_gather4_lz, "IMAGE_GATHER4_LZ">; + +// Gather4 with comparison alternative. +defm : ImageSampleAltPatterns<SIImage_gather4_c, "IMAGE_GATHER4_C">; +defm : ImageSampleAltPatterns<SIImage_gather4_c_cl, "IMAGE_GATHER4_C_CL">; +defm : ImageSampleAltPatterns<SIImage_gather4_c_l, "IMAGE_GATHER4_C_L">; +defm : ImageSampleAltPatterns<SIImage_gather4_c_b, "IMAGE_GATHER4_C_B">; +defm : ImageSampleAltPatterns<SIImage_gather4_c_b_cl, "IMAGE_GATHER4_C_B_CL">; +defm : ImageSampleAltPatterns<SIImage_gather4_c_lz, "IMAGE_GATHER4_C_LZ">; + +// Gather4 with offsets alternative. +defm : ImageSampleAltPatterns<SIImage_gather4_o, "IMAGE_GATHER4_O">; +defm : ImageSampleAltPatterns<SIImage_gather4_cl_o, "IMAGE_GATHER4_CL_O">; +defm : ImageSampleAltPatterns<SIImage_gather4_l_o, "IMAGE_GATHER4_L_O">; +defm : ImageSampleAltPatterns<SIImage_gather4_b_o, "IMAGE_GATHER4_B_O">; +defm : ImageSampleAltPatterns<SIImage_gather4_b_cl_o, "IMAGE_GATHER4_B_CL_O">; +defm : ImageSampleAltPatterns<SIImage_gather4_lz_o, "IMAGE_GATHER4_LZ_O">; + +// Gather4 with comparison and offsets alternative. +defm : ImageSampleAltPatterns<SIImage_gather4_c_o, "IMAGE_GATHER4_C_O">; +defm : ImageSampleAltPatterns<SIImage_gather4_c_cl_o, "IMAGE_GATHER4_C_CL_O">; +defm : ImageSampleAltPatterns<SIImage_gather4_c_l_o, "IMAGE_GATHER4_C_L_O">; +defm : ImageSampleAltPatterns<SIImage_gather4_c_b_o, "IMAGE_GATHER4_C_B_O">; +defm : ImageSampleAltPatterns<SIImage_gather4_c_b_cl_o, "IMAGE_GATHER4_C_B_CL_O">; +defm : ImageSampleAltPatterns<SIImage_gather4_c_lz_o, "IMAGE_GATHER4_C_LZ_O">; + +defm : ImageSamplePatterns<int_amdgcn_image_getlod, "IMAGE_GET_LOD">; // Image atomics defm : ImageAtomicPatterns<int_amdgcn_image_atomic_swap, "IMAGE_ATOMIC_SWAP">; diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index bf8d88bf4fa..1b93c2f5248 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -85,7 +85,10 @@ enum : uint64_t { ClampHi = UINT64_C(1) << 48, // Is a packed VOP3P instruction. - IsPacked = UINT64_C(1) << 49 + IsPacked = UINT64_C(1) << 49, + + // "d16" bit set or not. + D16 = UINT64_C(1) << 50 }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 5cb22767e68..7dc9dcf31fc 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3510,6 +3510,163 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } +static unsigned getImageOpcode(unsigned IID) { + switch (IID) { + case Intrinsic::amdgcn_image_load: + return AMDGPUISD::IMAGE_LOAD; + case Intrinsic::amdgcn_image_load_mip: + return AMDGPUISD::IMAGE_LOAD_MIP; + + // Basic sample. + case Intrinsic::amdgcn_image_sample: + return AMDGPUISD::IMAGE_SAMPLE; + case Intrinsic::amdgcn_image_sample_cl: + return AMDGPUISD::IMAGE_SAMPLE_CL; + case Intrinsic::amdgcn_image_sample_d: + return AMDGPUISD::IMAGE_SAMPLE_D; + case Intrinsic::amdgcn_image_sample_d_cl: + return AMDGPUISD::IMAGE_SAMPLE_D_CL; + case Intrinsic::amdgcn_image_sample_l: + return AMDGPUISD::IMAGE_SAMPLE_L; + case Intrinsic::amdgcn_image_sample_b: + return AMDGPUISD::IMAGE_SAMPLE_B; + case Intrinsic::amdgcn_image_sample_b_cl: + return AMDGPUISD::IMAGE_SAMPLE_B_CL; + case Intrinsic::amdgcn_image_sample_lz: + return AMDGPUISD::IMAGE_SAMPLE_LZ; + case Intrinsic::amdgcn_image_sample_cd: + return AMDGPUISD::IMAGE_SAMPLE_CD; + case Intrinsic::amdgcn_image_sample_cd_cl: + return AMDGPUISD::IMAGE_SAMPLE_CD_CL; + + // Sample with comparison. + case Intrinsic::amdgcn_image_sample_c: + return AMDGPUISD::IMAGE_SAMPLE_C; + case Intrinsic::amdgcn_image_sample_c_cl: + return AMDGPUISD::IMAGE_SAMPLE_C_CL; + case Intrinsic::amdgcn_image_sample_c_d: + return AMDGPUISD::IMAGE_SAMPLE_C_D; + case Intrinsic::amdgcn_image_sample_c_d_cl: + return AMDGPUISD::IMAGE_SAMPLE_C_D_CL; + case Intrinsic::amdgcn_image_sample_c_l: + return AMDGPUISD::IMAGE_SAMPLE_C_L; + case Intrinsic::amdgcn_image_sample_c_b: + return AMDGPUISD::IMAGE_SAMPLE_C_B; + case Intrinsic::amdgcn_image_sample_c_b_cl: + return AMDGPUISD::IMAGE_SAMPLE_C_B_CL; + case Intrinsic::amdgcn_image_sample_c_lz: + return AMDGPUISD::IMAGE_SAMPLE_C_LZ; + case Intrinsic::amdgcn_image_sample_c_cd: + return AMDGPUISD::IMAGE_SAMPLE_C_CD; + case Intrinsic::amdgcn_image_sample_c_cd_cl: + return AMDGPUISD::IMAGE_SAMPLE_C_CD_CL; + + // Sample with offsets. + case Intrinsic::amdgcn_image_sample_o: + return AMDGPUISD::IMAGE_SAMPLE_O; + case Intrinsic::amdgcn_image_sample_cl_o: + return AMDGPUISD::IMAGE_SAMPLE_CL_O; + case Intrinsic::amdgcn_image_sample_d_o: + return AMDGPUISD::IMAGE_SAMPLE_D_O; + case Intrinsic::amdgcn_image_sample_d_cl_o: + return AMDGPUISD::IMAGE_SAMPLE_D_CL_O; + case Intrinsic::amdgcn_image_sample_l_o: + return AMDGPUISD::IMAGE_SAMPLE_L_O; + case Intrinsic::amdgcn_image_sample_b_o: + return AMDGPUISD::IMAGE_SAMPLE_B_O; + case Intrinsic::amdgcn_image_sample_b_cl_o: + return AMDGPUISD::IMAGE_SAMPLE_B_CL_O; + case Intrinsic::amdgcn_image_sample_lz_o: + return AMDGPUISD::IMAGE_SAMPLE_LZ_O; + case Intrinsic::amdgcn_image_sample_cd_o: + return AMDGPUISD::IMAGE_SAMPLE_CD_O; + case Intrinsic::amdgcn_image_sample_cd_cl_o: + return AMDGPUISD::IMAGE_SAMPLE_CD_CL_O; + + // Sample with comparison and offsets. + case Intrinsic::amdgcn_image_sample_c_o: + return AMDGPUISD::IMAGE_SAMPLE_C_O; + case Intrinsic::amdgcn_image_sample_c_cl_o: + return AMDGPUISD::IMAGE_SAMPLE_C_CL_O; + case Intrinsic::amdgcn_image_sample_c_d_o: + return AMDGPUISD::IMAGE_SAMPLE_C_D_O; + case Intrinsic::amdgcn_image_sample_c_d_cl_o: + return AMDGPUISD::IMAGE_SAMPLE_C_D_CL_O; + case Intrinsic::amdgcn_image_sample_c_l_o: + return AMDGPUISD::IMAGE_SAMPLE_C_L_O; + case Intrinsic::amdgcn_image_sample_c_b_o: + return AMDGPUISD::IMAGE_SAMPLE_C_B_O; + case Intrinsic::amdgcn_image_sample_c_b_cl_o: + return AMDGPUISD::IMAGE_SAMPLE_C_B_CL_O; + case Intrinsic::amdgcn_image_sample_c_lz_o: + return AMDGPUISD::IMAGE_SAMPLE_C_LZ_O; + case Intrinsic::amdgcn_image_sample_c_cd_o: + return AMDGPUISD::IMAGE_SAMPLE_C_CD_O; + case Intrinsic::amdgcn_image_sample_c_cd_cl_o: + return AMDGPUISD::IMAGE_SAMPLE_C_CD_CL_O; + + // Basic gather4. + case Intrinsic::amdgcn_image_gather4: + return AMDGPUISD::IMAGE_GATHER4; + case Intrinsic::amdgcn_image_gather4_cl: + return AMDGPUISD::IMAGE_GATHER4_CL; + case Intrinsic::amdgcn_image_gather4_l: + return AMDGPUISD::IMAGE_GATHER4_L; + case Intrinsic::amdgcn_image_gather4_b: + return AMDGPUISD::IMAGE_GATHER4_B; + case Intrinsic::amdgcn_image_gather4_b_cl: + return AMDGPUISD::IMAGE_GATHER4_B_CL; + case Intrinsic::amdgcn_image_gather4_lz: + return AMDGPUISD::IMAGE_GATHER4_LZ; + + // Gather4 with comparison. + case Intrinsic::amdgcn_image_gather4_c: + return AMDGPUISD::IMAGE_GATHER4_C; + case Intrinsic::amdgcn_image_gather4_c_cl: + return AMDGPUISD::IMAGE_GATHER4_C_CL; + case Intrinsic::amdgcn_image_gather4_c_l: + return AMDGPUISD::IMAGE_GATHER4_C_L; + case Intrinsic::amdgcn_image_gather4_c_b: + return AMDGPUISD::IMAGE_GATHER4_C_B; + case Intrinsic::amdgcn_image_gather4_c_b_cl: + return AMDGPUISD::IMAGE_GATHER4_C_B_CL; + case Intrinsic::amdgcn_image_gather4_c_lz: + return AMDGPUISD::IMAGE_GATHER4_C_LZ; + + // Gather4 with offsets. + case Intrinsic::amdgcn_image_gather4_o: + return AMDGPUISD::IMAGE_GATHER4_O; + case Intrinsic::amdgcn_image_gather4_cl_o: + return AMDGPUISD::IMAGE_GATHER4_CL_O; + case Intrinsic::amdgcn_image_gather4_l_o: + return AMDGPUISD::IMAGE_GATHER4_L_O; + case Intrinsic::amdgcn_image_gather4_b_o: + return AMDGPUISD::IMAGE_GATHER4_B_O; + case Intrinsic::amdgcn_image_gather4_b_cl_o: + return AMDGPUISD::IMAGE_GATHER4_B_CL_O; + case Intrinsic::amdgcn_image_gather4_lz_o: + return AMDGPUISD::IMAGE_GATHER4_LZ_O; + + // Gather4 with comparison and offsets. + case Intrinsic::amdgcn_image_gather4_c_o: + return AMDGPUISD::IMAGE_GATHER4_C_O; + case Intrinsic::amdgcn_image_gather4_c_cl_o: + return AMDGPUISD::IMAGE_GATHER4_C_CL_O; + case Intrinsic::amdgcn_image_gather4_c_l_o: + return AMDGPUISD::IMAGE_GATHER4_C_L_O; + case Intrinsic::amdgcn_image_gather4_c_b_o: + return AMDGPUISD::IMAGE_GATHER4_C_B_O; + case Intrinsic::amdgcn_image_gather4_c_b_cl_o: + return AMDGPUISD::IMAGE_GATHER4_C_B_CL_O; + case Intrinsic::amdgcn_image_gather4_c_lz_o: + return AMDGPUISD::IMAGE_GATHER4_C_LZ_O; + + default: + break; + } + return 0; +} + static SDValue adjustLoadValueType(SDValue Result, EVT LoadVT, SDLoc DL, SelectionDAG &DAG, bool Unpacked) { if (Unpacked) { // From v2i32/v4i32 back to v2f16/v4f16. @@ -3545,16 +3702,16 @@ SDValue SITargetLowering::lowerIntrinsicWChain_IllegalReturnType(SDValue Op, switch (IID) { case Intrinsic::amdgcn_tbuffer_load: { SDValue Ops[] = { - Op.getOperand(0), // Chain - Op.getOperand(2), // rsrc - Op.getOperand(3), // vindex - Op.getOperand(4), // voffset - Op.getOperand(5), // soffset - Op.getOperand(6), // offset - Op.getOperand(7), // dfmt - Op.getOperand(8), // nfmt - Op.getOperand(9), // glc - Op.getOperand(10) // slc + Op.getOperand(0), // Chain + Op.getOperand(2), // rsrc + Op.getOperand(3), // vindex + Op.getOperand(4), // voffset + Op.getOperand(5), // soffset + Op.getOperand(6), // offset + Op.getOperand(7), // dfmt + Op.getOperand(8), // nfmt + Op.getOperand(9), // glc + Op.getOperand(10) // slc }; Res = DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16, DL, VTList, Ops, M->getMemoryVT(), @@ -3563,19 +3720,134 @@ SDValue SITargetLowering::lowerIntrinsicWChain_IllegalReturnType(SDValue Op, return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked); } case Intrinsic::amdgcn_buffer_load_format: { - SDValue Ops[] = { - Op.getOperand(0), // Chain - Op.getOperand(2), // rsrc - Op.getOperand(3), // vindex - Op.getOperand(4), // offset - Op.getOperand(5), // glc - Op.getOperand(6) // slc - }; - Res = DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, - DL, VTList, Ops, M->getMemoryVT(), - M->getMemOperand()); - Chain = Res.getValue(1); - return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked); + SDValue Ops[] = { + Op.getOperand(0), // Chain + Op.getOperand(2), // rsrc + Op.getOperand(3), // vindex + Op.getOperand(4), // offset + Op.getOperand(5), // glc + Op.getOperand(6) // slc + }; + Res = DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, + DL, VTList, Ops, M->getMemoryVT(), + M->getMemOperand()); + Chain = Res.getValue(1); + return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked); + } + case Intrinsic::amdgcn_image_load: + case Intrinsic::amdgcn_image_load_mip: { + SDValue Ops[] = { + Op.getOperand(0), // Chain + Op.getOperand(2), // vaddr + Op.getOperand(3), // rsrc + Op.getOperand(4), // dmask + Op.getOperand(5), // glc + Op.getOperand(6), // slc + Op.getOperand(7), // lwe + Op.getOperand(8) // da + }; + unsigned Opc = getImageOpcode(IID); + Res = DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, M->getMemoryVT(), + M->getMemOperand()); + Chain = Res.getValue(1); + return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked); + } + // Basic sample. + case Intrinsic::amdgcn_image_sample: + case Intrinsic::amdgcn_image_sample_cl: + case Intrinsic::amdgcn_image_sample_d: + case Intrinsic::amdgcn_image_sample_d_cl: + case Intrinsic::amdgcn_image_sample_l: + case Intrinsic::amdgcn_image_sample_b: + case Intrinsic::amdgcn_image_sample_b_cl: + case Intrinsic::amdgcn_image_sample_lz: + case Intrinsic::amdgcn_image_sample_cd: + case Intrinsic::amdgcn_image_sample_cd_cl: + + // Sample with comparison. + case Intrinsic::amdgcn_image_sample_c: + case Intrinsic::amdgcn_image_sample_c_cl: + case Intrinsic::amdgcn_image_sample_c_d: + case Intrinsic::amdgcn_image_sample_c_d_cl: + case Intrinsic::amdgcn_image_sample_c_l: + case Intrinsic::amdgcn_image_sample_c_b: + case Intrinsic::amdgcn_image_sample_c_b_cl: + case Intrinsic::amdgcn_image_sample_c_lz: + case Intrinsic::amdgcn_image_sample_c_cd: + case Intrinsic::amdgcn_image_sample_c_cd_cl: + + // Sample with offsets. + case Intrinsic::amdgcn_image_sample_o: + case Intrinsic::amdgcn_image_sample_cl_o: + case Intrinsic::amdgcn_image_sample_d_o: + case Intrinsic::amdgcn_image_sample_d_cl_o: + case Intrinsic::amdgcn_image_sample_l_o: + case Intrinsic::amdgcn_image_sample_b_o: + case Intrinsic::amdgcn_image_sample_b_cl_o: + case Intrinsic::amdgcn_image_sample_lz_o: + case Intrinsic::amdgcn_image_sample_cd_o: + case Intrinsic::amdgcn_image_sample_cd_cl_o: + + // Sample with comparison and offsets. + case Intrinsic::amdgcn_image_sample_c_o: + case Intrinsic::amdgcn_image_sample_c_cl_o: + case Intrinsic::amdgcn_image_sample_c_d_o: + case Intrinsic::amdgcn_image_sample_c_d_cl_o: + case Intrinsic::amdgcn_image_sample_c_l_o: + case Intrinsic::amdgcn_image_sample_c_b_o: + case Intrinsic::amdgcn_image_sample_c_b_cl_o: + case Intrinsic::amdgcn_image_sample_c_lz_o: + case Intrinsic::amdgcn_image_sample_c_cd_o: + case Intrinsic::amdgcn_image_sample_c_cd_cl_o: + + // Basic gather4 + case Intrinsic::amdgcn_image_gather4: + case Intrinsic::amdgcn_image_gather4_cl: + case Intrinsic::amdgcn_image_gather4_l: + case Intrinsic::amdgcn_image_gather4_b: + case Intrinsic::amdgcn_image_gather4_b_cl: + case Intrinsic::amdgcn_image_gather4_lz: + + // Gather4 with comparison + case Intrinsic::amdgcn_image_gather4_c: + case Intrinsic::amdgcn_image_gather4_c_cl: + case Intrinsic::amdgcn_image_gather4_c_l: + case Intrinsic::amdgcn_image_gather4_c_b: + case Intrinsic::amdgcn_image_gather4_c_b_cl: + case Intrinsic::amdgcn_image_gather4_c_lz: + + // Gather4 with offsets + case Intrinsic::amdgcn_image_gather4_o: + case Intrinsic::amdgcn_image_gather4_cl_o: + case Intrinsic::amdgcn_image_gather4_l_o: + case Intrinsic::amdgcn_image_gather4_b_o: + case Intrinsic::amdgcn_image_gather4_b_cl_o: + case Intrinsic::amdgcn_image_gather4_lz_o: + + // Gather4 with comparison and offsets + case Intrinsic::amdgcn_image_gather4_c_o: + case Intrinsic::amdgcn_image_gather4_c_cl_o: + case Intrinsic::amdgcn_image_gather4_c_l_o: + case Intrinsic::amdgcn_image_gather4_c_b_o: + case Intrinsic::amdgcn_image_gather4_c_b_cl_o: + case Intrinsic::amdgcn_image_gather4_c_lz_o: { + SDValue Ops[] = { + Op.getOperand(0), // Chain + Op.getOperand(2), // vaddr + Op.getOperand(3), // rsrc + Op.getOperand(4), // sampler + Op.getOperand(5), // dmask + Op.getOperand(6), // unorm + Op.getOperand(7), // glc + Op.getOperand(8), // slc + Op.getOperand(9), // lwe + Op.getOperand(10) // da + }; + unsigned Opc = getImageOpcode(IID); + Res = DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, M->getMemoryVT(), + M->getMemOperand()); + Chain = Res.getValue(1); + return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked); } default: return SDValue(); @@ -4982,6 +5254,30 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, M->getMemoryVT(), M->getMemOperand()); } + case Intrinsic::amdgcn_image_store: + case Intrinsic::amdgcn_image_store_mip: { + SDValue VData = Op.getOperand(2); + bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16); + if (IsD16) + VData = handleD16VData(VData, DAG); + SDValue Ops[] = { + Chain, // Chain + VData, // vdata + Op.getOperand(3), // vaddr + Op.getOperand(4), // rsrc + Op.getOperand(5), // dmask + Op.getOperand(6), // glc + Op.getOperand(7), // slc + Op.getOperand(8), // lwe + Op.getOperand(9) // da + }; + unsigned Opc = (IntrinsicID==Intrinsic::amdgcn_image_store) ? + AMDGPUISD::IMAGE_STORE : AMDGPUISD::IMAGE_STORE_MIP; + MemSDNode *M = cast<MemSDNode>(Op); + return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, + M->getMemoryVT(), M->getMemOperand()); + } + default: return Op; } @@ -7101,7 +7397,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, unsigned Opcode = Node->getMachineOpcode(); if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() && - !TII->isGather4(Opcode)) { + !TII->isGather4(Opcode) && !TII->isD16(Opcode)) { return adjustWritemask(Node, DAG); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index 25917cc06e6..af9908b9846 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -118,6 +118,9 @@ class InstSI <dag outs, dag ins, string asm = "", // This bit indicates that this is a packed VOP3P instruction field bit IsPacked = 0; + // This bit indicates that this is a D16 instruction. + field bit D16 = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = SALU; let TSFlags{1} = VALU; @@ -173,6 +176,8 @@ class InstSI <dag outs, dag ins, string asm = "", let TSFlags{49} = IsPacked; + let TSFlags{50} = D16; + let SchedRW = [Write32Bit]; field bits<1> DisableSIDecoder = 0; @@ -247,6 +252,7 @@ class MIMGe <bits<7> op> : Enc64 { bits<1> tfe; bits<1> lwe; bits<1> slc; + bits<1> d16 = 0; bits<8> vaddr; bits<7> srsrc; bits<7> ssamp; @@ -265,6 +271,7 @@ class MIMGe <bits<7> op> : Enc64 { let Inst{47-40} = vdata; let Inst{52-48} = srsrc{6-2}; let Inst{57-53} = ssamp{6-2}; + let Inst{63} = d16; } class EXPe : Enc64 { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 24ee843e6ad..13f9959c4d8 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -456,6 +456,14 @@ public: return get(Opcode).TSFlags & SIInstrFlags::Gather4; } + static bool isD16(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::D16; + } + + bool isD16(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::D16; + } + static bool isFLAT(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::FLAT; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index df407217f37..f4516988b19 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -170,6 +170,134 @@ def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] >; +def SDTImage_load : SDTypeProfile<1, 7, + [ + SDTCisInt<1>, // vaddr + SDTCisInt<2>, // rsrc + SDTCisVT<3, i32>, // dmask + SDTCisVT<4, i1>, // glc + SDTCisVT<5, i1>, // slc + SDTCisVT<6, i1>, // lwe + SDTCisVT<7, i1> // da + ]>; +def SIImage_load : SDNode<"AMDGPUISD::IMAGE_LOAD", SDTImage_load, + [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; +def SIImage_load_mip : SDNode<"AMDGPUISD::IMAGE_LOAD_MIP", SDTImage_load, + [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; + +def SDTImage_store : SDTypeProfile<0, 8, + [ + SDTCisInt<1>, // vaddr + SDTCisInt<2>, // rsrc + SDTCisVT<3, i32>, // dmask + SDTCisVT<4, i1>, // glc + SDTCisVT<5, i1>, // slc + SDTCisVT<6, i1>, // lwe + SDTCisVT<7, i1> // da + ]>; +def SIImage_store : SDNode <"AMDGPUISD::IMAGE_STORE", + SDTImage_store, + [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; +def SIImage_store_mip : SDNode <"AMDGPUISD::IMAGE_STORE_MIP", + SDTImage_store, + [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; + +class SDTImage_sample<string opcode> : SDNode <opcode, + SDTypeProfile<1, 9, + [ + SDTCisFP<1>, // vaddr + SDTCisInt<2>, // rsrc + SDTCisVT<3, v4i32>, // sampler + SDTCisVT<4, i32>, // dmask + SDTCisVT<5, i1>, // unorm + SDTCisVT<6, i1>, // glc + SDTCisVT<7, i1>, // slc + SDTCisVT<8, i1>, // lwe + SDTCisVT<9, i1> // da + ]>, + [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] +>; + +// Basic sample. +def SIImage_sample : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE">; +def SIImage_sample_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CL">; +def SIImage_sample_d : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_D">; +def SIImage_sample_d_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_D_CL">; +def SIImage_sample_l : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_L">; +def SIImage_sample_b : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_B">; +def SIImage_sample_b_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_B_CL">; +def SIImage_sample_lz : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_LZ">; +def SIImage_sample_cd : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CD">; +def SIImage_sample_cd_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CD_CL">; + +// Sample with comparison. +def SIImage_sample_c : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C">; +def SIImage_sample_c_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CL">; +def SIImage_sample_c_d : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_D">; +def SIImage_sample_c_d_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_D_CL">; +def SIImage_sample_c_l : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_L">; +def SIImage_sample_c_b : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_B">; +def SIImage_sample_c_b_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_B_CL">; +def SIImage_sample_c_lz : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_LZ">; +def SIImage_sample_c_cd : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CD">; +def SIImage_sample_c_cd_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CD_CL">; + +// Sample with offsets. +def SIImage_sample_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_O">; +def SIImage_sample_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CL_O">; +def SIImage_sample_d_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_D_O">; +def SIImage_sample_d_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_D_CL_O">; +def SIImage_sample_l_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_L_O">; +def SIImage_sample_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_B_O">; +def SIImage_sample_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_B_CL_O">; +def SIImage_sample_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_LZ_O">; +def SIImage_sample_cd_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CD_O">; +def SIImage_sample_cd_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CD_CL_O">; + +// Sample with comparison and offsets. +def SIImage_sample_c_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_O">; +def SIImage_sample_c_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CL_O">; +def SIImage_sample_c_d_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_D_O">; +def SIImage_sample_c_d_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_D_CL_O">; +def SIImage_sample_c_l_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_L_O">; +def SIImage_sample_c_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_B_O">; +def SIImage_sample_c_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_B_CL_O">; +def SIImage_sample_c_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_LZ_O">; +def SIImage_sample_c_cd_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CD_O">; +def SIImage_sample_c_cd_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CD_CL_O">; + +// Basic gather4. +def SIImage_gather4 : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4">; +def SIImage_gather4_cl : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_CL">; +def SIImage_gather4_l : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_L">; +def SIImage_gather4_b : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_B">; +def SIImage_gather4_b_cl : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_B_CL">; +def SIImage_gather4_lz : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_LZ">; + +// Gather4 with comparison. +def SIImage_gather4_c : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C">; +def SIImage_gather4_c_cl : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_CL">; +def SIImage_gather4_c_l : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_L">; +def SIImage_gather4_c_b : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B">; +def SIImage_gather4_c_b_cl : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_CL">; +def SIImage_gather4_c_lz : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_LZ">; + +// Gather4 with offsets. +def SIImage_gather4_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_O">; +def SIImage_gather4_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_CL_O">; +def SIImage_gather4_l_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_L_O">; +def SIImage_gather4_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_B_O">; +def SIImage_gather4_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_B_CL_O">; +def SIImage_gather4_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_LZ_O">; + +// Gather4 with comparison and offsets. +def SIImage_gather4_c_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_O">; +def SIImage_gather4_c_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_CL_O">; +def SIImage_gather4_c_l_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_L_O">; +def SIImage_gather4_c_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_O">; +def SIImage_gather4_c_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_CL_O">; +def SIImage_gather4_c_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_LZ_O">; + class SDSample<string opcode> : SDNode <opcode, SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v8i32>, SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]> |

