diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-03-17 15:17:41 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-03-17 15:17:41 +0000 |
| commit | 85803366d6dac06f32dee66c9fcd17b51cf3b3e3 (patch) | |
| tree | 9c9a585c58a8fc31602530a5ef5474ab8617838c | |
| parent | 7e71129be40ab6ba56e6993da8f1646cd5061cf2 (diff) | |
| download | bcm5719-llvm-85803366d6dac06f32dee66c9fcd17b51cf3b3e3.tar.gz bcm5719-llvm-85803366d6dac06f32dee66c9fcd17b51cf3b3e3.zip | |
AMDGPU/GlobalISel: Basic legality for load/store
llvm-svn: 327772
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 53 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir | 131 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir | 122 |
3 files changed, 292 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index de8147d093f..283de3b31da 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -14,6 +14,7 @@ #include "AMDGPU.h" #include "AMDGPULegalizerInfo.h" +#include "AMDGPUTargetMachine.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" @@ -27,12 +28,19 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const SISubtarget &ST, const GCNTargetMachine &TM) { using namespace TargetOpcode; - const LLT S1= LLT::scalar(1); + auto GetAddrSpacePtr = [&TM](unsigned AS) { + return LLT::pointer(AS, TM.getPointerSizeInBits(AS)); + }; + + const LLT S1 = LLT::scalar(1); const LLT V2S16 = LLT::vector(2, 16); + const LLT S32 = LLT::scalar(32); const LLT S64 = LLT::scalar(64); - const LLT P1 = LLT::pointer(AMDGPUAS::GLOBAL_ADDRESS, 64); - const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); + + const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS); + const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS); + setAction({G_ADD, S32}, Legal); setAction({G_MUL, S32}, Legal); @@ -76,26 +84,45 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const SISubtarget &ST, setAction({G_FPTOUI, S32}, Legal); setAction({G_FPTOUI, 1, S32}, Legal); - setAction({G_GEP, P1}, Legal); - setAction({G_GEP, P2}, Legal); + setAction({G_GEP, GlobalPtr}, Legal); + setAction({G_GEP, ConstantPtr}, Legal); setAction({G_GEP, 1, S64}, Legal); setAction({G_ICMP, S1}, Legal); setAction({G_ICMP, 1, S32}, Legal); - setAction({G_LOAD, P1}, Legal); - setAction({G_LOAD, P2}, Legal); - setAction({G_LOAD, S32}, Legal); - setAction({G_LOAD, 1, P1}, Legal); - setAction({G_LOAD, 1, P2}, Legal); + + getActionDefinitionsBuilder({G_LOAD, G_STORE}) + .legalIf([=, &ST](const LegalityQuery &Query) { + const LLT &Ty0 = Query.Types[0]; + + // TODO: Decompose private loads into 4-byte components. + // TODO: Illegal flat loads on SI + switch (Ty0.getSizeInBits()) { + case 32: + case 64: + case 128: + return true; + + case 96: + // XXX hasLoadX3 + return (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS); + + case 256: + case 512: + // TODO: constant loads + default: + return false; + } + }); + + setAction({G_SELECT, S32}, Legal); setAction({G_SELECT, 1, S1}, Legal); setAction({G_SHL, S32}, Legal); - setAction({G_STORE, S32}, Legal); - setAction({G_STORE, 1, P1}, Legal); // FIXME: When RegBankSelect inserts copies, it will only create new // registers with scalar types. This means we can end up with @@ -104,8 +131,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const SISubtarget &ST, // if it sees a generic instruction which isn't legal, so we need to // tell it that scalar types are legal for pointer operands setAction({G_GEP, S64}, Legal); - setAction({G_LOAD, 1, S64}, Legal); - setAction({G_STORE, 1, S64}, Legal); for (unsigned Op : {G_EXTRACT_VECTOR_ELT, G_INSERT_VECTOR_ELT}) { getActionDefinitionsBuilder(Op) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir new file mode 100644 index 00000000000..a058530dbb9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir @@ -0,0 +1,131 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s + +--- +name: test_load_global_i32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_load_global_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CHECK: $vgpr0 = COPY [[LOAD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 1) + + $vgpr0 = COPY %1 +... + +--- +name: test_load_global_i64 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_load_global_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CHECK: $vgpr0 = COPY [[LOAD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 1) + + $vgpr0 = COPY %1 +... + +--- +name: test_load_global_p1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_load_global_p1 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CHECK: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(p1) = G_LOAD %0 :: (load 8, addrspace 1) + + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_load_global_p4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_load_global_p4 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CHECK: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(p4) = G_LOAD %0 :: (load 8, addrspace 1) + + $vgpr0_vgpr1 = COPY %1 +... + + +--- +name: test_load_global_p3 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_load_global_p3 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CHECK: $vgpr0 = COPY [[LOAD]](p3) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(p3) = G_LOAD %0 :: (load 4, addrspace 1) + + $vgpr0 = COPY %1 +... + +--- +name: test_load_global_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_load_global_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CHECK: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, addrspace 1) + + $vgpr0_vgpr1 = COPY %1 +... + +--- + +name: test_load_global_v2s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_load_global_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_load_global_v3i32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_load_global_v3i32 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 1) + + $vgpr0_vgpr1_vgpr2 = COPY %1 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir new file mode 100644 index 00000000000..8d6696fe823 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -0,0 +1,122 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s + +--- +name: test_store_global_i32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; CHECK-LABEL: name: test_store_global_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + G_STORE %1, %0 :: (store 4, addrspace 1) +... + +--- +name: test_store_global_i64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_store_global_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 8, addrspace 1) +... + +--- +name: test_store_global_p1 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_store_global_p1 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store 8, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(p1) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 8, addrspace 1) +... + +--- +name: test_store_global_p4 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_store_global_p4 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr2_vgpr3 + ; CHECK: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store 8, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(p4) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 8, addrspace 1) +... + +--- +name: test_store_global_p3 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; CHECK-LABEL: name: test_store_global_p3 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(p3) = COPY $vgpr2 + G_STORE %1, %0 :: (store 4, addrspace 1) +... + +--- +name: test_store_global_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_store_global_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 8, addrspace 1) +... + +--- +name: test_store_global_v2s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; CHECK-LABEL: name: test_store_global_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store 4, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = COPY $vgpr2 + G_STORE %1, %0 :: (store 4, addrspace 1) +... + +--- +name: test_store_global_v3s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + + ; CHECK-LABEL: name: test_store_global_v3s32 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 + ; CHECK: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 4, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1, %0 :: (store 12, align 4, addrspace 1) +... |

