diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 9 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/DSInstructions.td | 22 |
2 files changed, 26 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index ef64c481b9b..50089498289 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1534,7 +1534,10 @@ defm : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, BUFFER_LOAD_SSHORT_OFFSET, defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, extloadi16_private>; defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, zextloadi16_private>; defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i16, load_private>; + +foreach vt = [i32, f32, v2i16, v2f16, p2, p3, p5, p6] in { defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, i32, load_private>; +} defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>; defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX3_OFFEN, BUFFER_LOAD_DWORDX3_OFFSET, v3i32, load_private>; defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_OFFSET, v4i32, load_private>; @@ -1609,7 +1612,11 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i32, truncstorei16_private>; defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>; defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i16, store_private>; -defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, i32, store_private>; + +foreach vt = [i32, f32, v2i16, v2f16, p2, p3, p5, p6] in { +defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, vt, store_private>; +} + defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private, VReg_64>; defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OFFSET, v3i32, store_private, VReg_96>; defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private, VReg_128>; diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 1c1d81a6c66..53e567a69c2 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -639,13 +639,20 @@ defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">; defm : DSReadPat_mc <DS_READ_U16, i32, "extloadi16_local">; defm : DSReadPat_mc <DS_READ_U16, i32, "zextloadi16_local">; defm : DSReadPat_mc <DS_READ_U16, i16, "load_local">; -defm : DSReadPat_mc <DS_READ_B32, i32, "load_local">; + +foreach vt = [i32, f32, v2i16, v2f16, p2, p3, p5, p6] in { +defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">; +} + defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">; defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">; let AddedComplexity = 100 in { -defm : DSReadPat_mc <DS_READ_B64, v2i32, "load_align8_local">; +foreach vt = VReg_64.RegTypes in { +defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">; +} + defm : DSReadPat_mc <DS_READ_B128, v4i32, "load_align16_local">; } // End AddedComplexity = 100 @@ -702,7 +709,11 @@ defm : DSWritePat_mc <DS_WRITE_B8, i32, "truncstorei8_local">; defm : DSWritePat_mc <DS_WRITE_B16, i32, "truncstorei16_local">; defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">; defm : DSWritePat_mc <DS_WRITE_B16, i16, "store_local">; -defm : DSWritePat_mc <DS_WRITE_B32, i32, "store_local">; + +foreach vt = VGPR_32.RegTypes in { +defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">; +} + defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_local">; defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_local">; @@ -739,7 +750,10 @@ def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32_gfx9, store_local>; let AddedComplexity = 100 in { -defm : DSWritePat_mc <DS_WRITE_B64, v2i32, "store_align8_local">; +foreach vt = VReg_64.RegTypes in { +defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align8_local">; +} + defm : DSWritePat_mc <DS_WRITE_B128, v4i32, "store_align16_local">; } // End AddedComplexity = 100 |

