diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-09-09 16:02:07 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-09-09 16:02:07 +0000 |
commit | ebbd6e49768271297d17bcecd22eae2128e24e26 (patch) | |
tree | 26eec1b36481602683fafa9cd7c3654aa3a1e7f9 | |
parent | c34b4036ffe115c7cc03b9236922e98b78adb8b1 (diff) | |
download | bcm5719-llvm-ebbd6e49768271297d17bcecd22eae2128e24e26.tar.gz bcm5719-llvm-ebbd6e49768271297d17bcecd22eae2128e24e26.zip |
AMDGPU: Remove code address space predicates
Fixes 8-byte, 8-byte aligned LDS loads. 16-byte case still broken due
to not be reported as legal.
llvm-svn: 371413
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/DSInstructions.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 68 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir | 42 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir | 304 | ||||
-rw-r--r-- | llvm/utils/TableGen/CodeGenDAGPatterns.cpp | 1 |
6 files changed, 400 insertions, 29 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 02e38cd96f8..769e56b4b09 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -69,8 +69,14 @@ def gi_ds_1addr_1offset : // SelectionDAG. The GISel selector can just insert m0 initialization // directly before before selecting a glue-less load, so hide this // distinction. -def : GINodeEquiv<G_LOAD, AMDGPUld_glue>; -def : GINodeEquiv<G_STORE, AMDGPUst_glue>; +def : GINodeEquiv<G_LOAD, AMDGPUld_glue> { + let CheckMMOIsNonAtomic = 1; +} + +def : GINodeEquiv<G_STORE, AMDGPUst_glue> { + let CheckMMOIsNonAtomic = 1; +} + def : GINodeEquiv<G_ATOMIC_CMPXCHG, atomic_cmp_swap_glue>; def : GINodeEquiv<G_ATOMICRMW_XCHG, atomic_swap_glue>; diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 6960727d882..e1d53ae71a8 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -714,8 +714,8 @@ foreach vt = VGPR_32.RegTypes in { defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">; } -defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_local">; -defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_local">; +defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_local_32">; +defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_local_64">; let OtherPredicates = [D16PreservesUnusedBits] in { def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_local_hi16>; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index e63923b89d1..9fda47d541f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -441,19 +441,32 @@ def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr) def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>; } -def load_align8_local_m0 : LoadFrag <load_glue>, LocalAddress { - let MinAlignment = 8; +def load_align8_local_m0 : PatFrag<(ops node:$ptr), + (load_local_m0 node:$ptr)> { + let IsLoad = 1; let IsNonExtLoad = 1; + let MinAlignment = 8; } -def load_align16_local_m0 : LoadFrag <load_glue>, LocalAddress { - let MinAlignment = 16; +def load_align16_local_m0 : PatFrag<(ops node:$ptr), + (load_local_m0 node:$ptr)> { + let IsLoad = 1; let IsNonExtLoad = 1; + let MinAlignment = 16; } } // End IsLoad = 1 -def atomic_load_32_local_m0 : LoadFrag<atomic_load_32_glue>, LocalAddress; -def atomic_load_64_local_m0 : LoadFrag<atomic_load_64_glue>, LocalAddress; +let AddressSpaces = LoadAddress_local.AddrSpaces in { + +def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), + (atomic_load_32_glue node:$ptr)> { + let IsAtomic = 1; +} +def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr), + (atomic_load_64_glue node:$ptr)> { + let IsAtomic = 1; +} +} // End let AddressSpaces = LoadAddress_local.AddrSpaces def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore, @@ -464,10 +477,6 @@ def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] >; -def atomic_store_glue : PatFrag<(ops node:$ptr, node:$val), - (AMDGPUatomic_st_glue node:$ptr, node:$val)> { -} - def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr), (AMDGPUst_glue node:$val, node:$ptr)> { let IsStore = 1; @@ -499,13 +508,8 @@ def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), } let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { -def store_glue_align8 : PatFrag<(ops node:$val, node:$ptr), - (store_glue node:$val, node:$ptr)>, Aligned<8>; -def store_glue_align16 : PatFrag<(ops node:$val, node:$ptr), - (store_glue node:$val, node:$ptr)>, Aligned<16>; - def store_local_m0 : PatFrag<(ops node:$val, node:$ptr), - (unindexedstore_glue node:$val, node:$ptr)> { + (store_glue node:$val, node:$ptr)> { let IsStore = 1; let IsTruncStore = 0; } @@ -516,23 +520,45 @@ def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr), let MemoryVT = i8; } - def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr), (unindexedstore_glue node:$val, node:$ptr)> { let IsStore = 1; let MemoryVT = i16; } +} -// FIXME: atomic store doesn't work. -def atomic_store_local_m0 : StoreFrag<AMDGPUatomic_st_glue>, LocalAddress; -def store_align8_local_m0 : StoreFrag<store_glue_align8>, LocalAddress { +def store_align16_local_m0 : PatFrag < + (ops node:$value, node:$ptr), + (store_local_m0 node:$value, node:$ptr)> { + let IsStore = 1; let IsTruncStore = 0; + let MinAlignment = 16; } -def store_align16_local_m0 : StoreFrag<store_glue_align16>, LocalAddress { +def store_align8_local_m0 : PatFrag < + (ops node:$value, node:$ptr), + (store_local_m0 node:$value, node:$ptr)> { + let IsStore = 1; let IsTruncStore = 0; + let MinAlignment = 8; +} + +let AddressSpaces = StoreAddress_local.AddrSpaces in { + +def atomic_store_local_32_m0 : PatFrag < + (ops node:$value, node:$ptr), + (AMDGPUatomic_st_glue node:$value, node:$ptr)> { + let IsAtomic = 1; + let MemoryVT = i32; } +def atomic_store_local_64_m0 : PatFrag < + (ops node:$value, node:$ptr), + (AMDGPUatomic_st_glue node:$value, node:$ptr)> { + let IsAtomic = 1; + let MemoryVT = i64; } +} // End let AddressSpaces = StoreAddress_local.AddrSpaces + def si_setcc_uniform : PatFrag < (ops node:$lhs, node:$rhs, node:$cond), diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir index f267163206f..0f7689cfc99 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir @@ -215,7 +215,7 @@ body: | --- -name: load_local_v4s32 +name: load_local_v4s32_align16 legalized: true regBankSelected: true tracksRegLiveness: true @@ -224,19 +224,53 @@ body: | bb.0: liveins: $vgpr0 - ; GFX6-LABEL: name: load_local_v4s32 + ; GFX6-LABEL: name: load_local_v4s32_align16 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 16, addrspace 3) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_]] + ; GFX7-LABEL: name: load_local_v4s32_align16 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 16, addrspace 3) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_]] + ; GFX9-LABEL: name: load_local_v4s32_align16 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 16, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 16, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_local_v4s32_align_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v4s32_align_4 ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6: $m0 = S_MOV_B32 -1 ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; GFX7-LABEL: name: load_local_v4s32 + ; GFX7-LABEL: name: load_local_v4s32_align_4 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; GFX9-LABEL: name: load_local_v4s32 + ; GFX9-LABEL: name: load_local_v4s32_align_4 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir index 8dd906172cc..449a3e5f725 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir @@ -260,3 +260,307 @@ body: | G_STORE %1, %0 :: (store 1, align 1, addrspace 3) ... + +--- + +name: store_local_s64_align4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX6-LABEL: name: store_local_s64_align4 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX7-LABEL: name: store_local_s64_align4 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX9-LABEL: name: store_local_s64_align4 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX9: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + G_STORE %0, %1 :: (store 8, align 4, addrspace 3) + +... + +--- + +name: store_local_p1_align4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX6-LABEL: name: store_local_p1_align4 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX7-LABEL: name: store_local_p1_align4 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX9-LABEL: name: store_local_p1_align4 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX9: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + G_STORE %0, %1 :: (store 8, align 4, addrspace 3) + +... + +--- + +name: store_local_v2s32_align4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX6-LABEL: name: store_local_v2s32_align4 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX7-LABEL: name: store_local_v2s32_align4 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX9-LABEL: name: store_local_v2s32_align4 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX9: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + G_STORE %0, %1 :: (store 8, align 4, addrspace 3) + +... + +--- + +name: store_local_v4s16_align4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX6-LABEL: name: store_local_v4s16_align4 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX7-LABEL: name: store_local_v4s16_align4 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX9-LABEL: name: store_local_v4s16_align4 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX9: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + G_STORE %0, %1 :: (store 8, align 4, addrspace 3) + +... + +--- + +name: store_local_s64_align8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX6-LABEL: name: store_local_s64_align8 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3) + ; GFX7-LABEL: name: store_local_s64_align8 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3) + ; GFX9-LABEL: name: store_local_s64_align8 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 8, addrspace 3) + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + G_STORE %0, %1 :: (store 8, align 8, addrspace 3) + +... + +--- + +name: store_local_p1_align8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX6-LABEL: name: store_local_p1_align8 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3) + ; GFX7-LABEL: name: store_local_p1_align8 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3) + ; GFX9-LABEL: name: store_local_p1_align8 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 8, addrspace 3) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + G_STORE %0, %1 :: (store 8, align 8, addrspace 3) + +... + +--- + +name: store_local_v2s32_align8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX6-LABEL: name: store_local_v2s32_align8 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3) + ; GFX7-LABEL: name: store_local_v2s32_align8 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3) + ; GFX9-LABEL: name: store_local_v2s32_align8 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 8, addrspace 3) + %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + G_STORE %0, %1 :: (store 8, align 8, addrspace 3) + +... + +--- + +name: store_local_v4s16_align8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX6-LABEL: name: store_local_v4s16_align8 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3) + ; GFX7-LABEL: name: store_local_v4s16_align8 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3) + ; GFX9-LABEL: name: store_local_v4s16_align8 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 8, addrspace 3) + %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + G_STORE %0, %1 :: (store 8, align 8, addrspace 3) + +... diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp index c30186008df..07fffaec73d 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -919,6 +919,7 @@ std::string TreePredicateFn::getPredCode() const { if (isAtomic()) { if (getMemoryVT() == nullptr && !isAtomicOrderingMonotonic() && + getAddressSpaces() == nullptr && !isAtomicOrderingAcquire() && !isAtomicOrderingRelease() && !isAtomicOrderingAcquireRelease() && !isAtomicOrderingSequentiallyConsistent() && |