diff options
| author | Tim Renouf <tpr.llvm@botech.co.uk> | 2019-04-17 14:04:31 +0000 |
|---|---|---|
| committer | Tim Renouf <tpr.llvm@botech.co.uk> | 2019-04-17 14:04:31 +0000 |
| commit | 59e8bd3093dc1c2e877c206a3c29e3150a7878e0 (patch) | |
| tree | 6435d7ada9f19197bcbe83c7e737c428891f9c39 /llvm | |
| parent | 272f15abc3894de699cabd9cf6f49f577878c0f3 (diff) | |
| download | bcm5719-llvm-59e8bd3093dc1c2e877c206a3c29e3150a7878e0.tar.gz bcm5719-llvm-59e8bd3093dc1c2e877c206a3c29e3150a7878e0.zip | |
[AMDGPU] Flag new raw/struct atomic ops as source of divergence
Differential Revision: https://reviews.llvm.org/D60731
Change-Id: I821d93dec8b9cdd247b8172d92fb5e15340a9e7d
llvm-svn: 358579
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td | 22 | ||||
| -rw-r--r-- | llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll | 200 |
2 files changed, 222 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index 916b1efffd0..c376d2ede5b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -71,6 +71,28 @@ def : SourceOfDivergence<int_amdgcn_buffer_atomic_and>; def : SourceOfDivergence<int_amdgcn_buffer_atomic_or>; def : SourceOfDivergence<int_amdgcn_buffer_atomic_xor>; def : SourceOfDivergence<int_amdgcn_buffer_atomic_cmpswap>; +def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_swap>; +def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_add>; +def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_sub>; +def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_smin>; +def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_umin>; +def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_smax>; +def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_umax>; +def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_and>; +def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_or>; +def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_xor>; +def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cmpswap>; +def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_swap>; +def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_add>; +def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_sub>; +def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_smin>; +def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_umin>; +def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_smax>; +def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_umax>; +def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_and>; +def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_or>; +def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_xor>; +def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>; def : SourceOfDivergence<int_amdgcn_ps_live>; def : SourceOfDivergence<int_amdgcn_ds_swizzle>; def : SourceOfDivergence<int_amdgcn_ds_ordered_add>; diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll index 34ceb3580c5..5bc5fe34cda 100644 --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll @@ -88,6 +88,182 @@ main_body: ret float %r } +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32( +define float @raw_buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32( +define float @raw_buffer_atomic_add(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32( +define float @raw_buffer_atomic_sub(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32( +define float @raw_buffer_atomic_smin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32( +define float @raw_buffer_atomic_umin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32( +define float @raw_buffer_atomic_smax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32( +define float @raw_buffer_atomic_umax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.and.i32( +define float @raw_buffer_atomic_and(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.or.i32( +define float @raw_buffer_atomic_or(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32( +define float @raw_buffer_atomic_xor(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32( +define float @raw_buffer_atomic_cmpswap(<4 x i32> inreg %rsrc, i32 inreg %data, i32 inreg %cmp) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32( +define float @struct_buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32( +define float @struct_buffer_atomic_add(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32( +define float @struct_buffer_atomic_sub(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32( +define float @struct_buffer_atomic_smin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32( +define float @struct_buffer_atomic_umin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32( +define float @struct_buffer_atomic_smax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32( +define float @struct_buffer_atomic_umax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.and.i32( +define float @struct_buffer_atomic_and(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.or.i32( +define float @struct_buffer_atomic_or(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32( +define float @struct_buffer_atomic_xor(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32( +define float @struct_buffer_atomic_cmpswap(<4 x i32> inreg %rsrc, i32 inreg %data, i32 inreg %cmp) #0 { +main_body: + %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) + %r = bitcast i32 %orig to float + ret float %r +} + declare i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i1) #0 declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) #0 declare i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i1) #0 @@ -100,4 +276,28 @@ declare i32 @llvm.amdgcn.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i1) #0 declare i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i1) #0 declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0 +declare i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32) #0 + +declare i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32, i32) #0 +declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32) #0 + attributes #0 = { nounwind } |

