diff options
| author | Tom Stellard <thomas.stellard@amd.com> | 2016-08-29 19:15:22 +0000 |
|---|---|---|
| committer | Tom Stellard <thomas.stellard@amd.com> | 2016-08-29 19:15:22 +0000 |
| commit | c2ff0eb69762f0c87545b74c89d99cfdbb0913e9 (patch) | |
| tree | 23670976c67603007a5f4b62773205d746b8b9dc /llvm/test | |
| parent | c10c33444e9c02124eb39d1521646ee1bc8a5525 (diff) | |
| download | bcm5719-llvm-c2ff0eb69762f0c87545b74c89d99cfdbb0913e9.tar.gz bcm5719-llvm-c2ff0eb69762f0c87545b74c89d99cfdbb0913e9.zip | |
AMDGPU/SI: Improve SILoadStoreOptimizer and run it before the scheduler
Summary:
The SILoadStoreOptimizer can now look ahead more then one instruction when
looking for instructions to merge, which greatly improves the number of
loads/stores that we are able to merge.
Moving the pass before scheduling avoids increasing register pressure after
the scheduler, so that the scheduler's register pressure estimates will be
more accurate. It also gives more consistent results, since it is no longer
affected by minor scheduling changes.
Reviewers: arsenm
Subscribers: arsenm, kzhuravl, llvm-commits
Differential Revision: https://reviews.llvm.org/D23814
llvm-svn: 279991
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/ds_read2_offset_order.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/ds_write2.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fceil64.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/load-local-i16.ll | 54 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/load-local-i32.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/store-v3i64.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll | 6 |
10 files changed, 61 insertions, 39 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2_offset_order.ll b/llvm/test/CodeGen/AMDGPU/ds_read2_offset_order.ll index d62782e5ec7..dab3c10d682 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_read2_offset_order.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_read2_offset_order.ll @@ -9,8 +9,8 @@ ; SI-LABEL: {{^}}offset_order: ; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:2 offset1:3 -; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:12 offset1:14 -; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:44 +; SI-DAG: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:12 offset1:14 +; SI-DAG: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:44 define void @offset_order(float addrspace(1)* %out) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/ds_write2.ll b/llvm/test/CodeGen/AMDGPU/ds_write2.ll index 45fcc01b2ad..ae230dac937 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_write2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_write2.ll @@ -179,8 +179,8 @@ define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float add } ; SI-LABEL: @simple_write2_two_val_f32_x2 -; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL0]] offset1:11 -; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL1:v[0-9]+]], [[VAL1]] offset0:8 offset1:27 +; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset1:8 +; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0]], [[VAL1]] offset0:11 offset1:27 ; SI: s_endpgm define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 @@ -209,8 +209,8 @@ define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspac } ; SI-LABEL: @simple_write2_two_val_f32_x2_nonzero_base -; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL0]] offset0:3 offset1:11 -; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL1:v[0-9]+]], [[VAL1]] offset0:8 offset1:27 +; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8 +; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0]], [[VAL1]] offset0:11 offset1:27 ; SI: s_endpgm define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 diff --git a/llvm/test/CodeGen/AMDGPU/fceil64.ll b/llvm/test/CodeGen/AMDGPU/fceil64.ll index fb5853b808e..f6cdbb48c75 100644 --- a/llvm/test/CodeGen/AMDGPU/fceil64.ll +++ b/llvm/test/CodeGen/AMDGPU/fceil64.ll @@ -13,8 +13,8 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone ; CI: v_ceil_f64_e32 ; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 ; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 -; SI-DAG: s_add_i32 [[SEXP1:s[0-9]+]], [[SEXP]], 0xfffffc01 -; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[SEXP1]] +; SI-DAG: s_addk_i32 [[SEXP]], 0xfc01 +; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[SEXP]] ; SI-DAG: s_not_b64 ; SI-DAG: s_and_b64 ; SI-DAG: cmp_gt_i32 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll index 73a5c54e175..4c559f42d42 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll @@ -25,7 +25,8 @@ define void @rsq_clamp_f32(float addrspace(1)* %out, float %src) #0 { ; SI: v_rsq_clamp_f64_e32 ; TODO: this constant should be folded: -; VI-DAG: s_mov_b32 s[[LOW1:[0-9+]]], -1 +; VI-DAG: s_mov_b32 [[NEG1:s[0-9+]]], -1 +; VI-DAG: s_mov_b32 s[[LOW1:[0-9+]]], [[NEG1]] ; VI-DAG: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff ; VI-DAG: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff ; VI-DAG: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}} diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll index ce311bfc1e6..9b0cbaa7701 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll @@ -73,8 +73,8 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v16i16: -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:3{{$}} +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}} ; EG: LDS_READ_RET @@ -287,11 +287,9 @@ define void @local_sextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1 ret void } -; FIXME: Should have 2 ds_read_b64 ; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i32: -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}} -; GCN-DAG: ds_read_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+$}} -; GCN-DAG: ds_read_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:24 +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} ; GCN: ds_write2_b64 ; GCN: ds_write2_b64 @@ -314,9 +312,9 @@ define void @local_zextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 } ; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i32: -; GCN-DAG: ds_read_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+$}} -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:3{{$}} -; GCN-DAG: ds_read_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:16{{$}} + +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -379,10 +377,18 @@ define void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 } ; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i32: -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}} -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:3 offset1:4 -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:5{{$}} +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1 ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -407,17 +413,31 @@ define void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 ret void } -; FIXME: Missed read2 ; FUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i32: -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:11 offset1:15 +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:14 offset1:15 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 -; GCN-DAG: ds_read_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:64 -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:9 offset1:10 +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:8 offset1:9 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:12 offset1:13 -; GCN-DAG: ds_read_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:112 +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:10 offset1:11 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:26 offset1:27 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:24 offset1:25 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:22 offset1:23 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:20 offset1:21 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:18 offset1:19 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:16 offset1:17 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1 ; EG: LDS_READ_RET ; EG: LDS_READ_RET diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i32.ll b/llvm/test/CodeGen/AMDGPU/load-local-i32.ll index 03f0ffecb5d..271e563024c 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-i32.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-i32.ll @@ -56,10 +56,14 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v16i32: -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:3 offset1:4{{$}} -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:5 offset1:6{{$}} -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:7{{$}} -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}} +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7{{$}} +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5{{$}} +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1 define void @local_load_v16i32(<16 x i32> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 { entry: %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in diff --git a/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll b/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll index f6c0e3c6239..a57e4f59532 100644 --- a/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll +++ b/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll @@ -44,8 +44,7 @@ entry: ; GCN-LABEL: {{^}}local_memory_two_objects: ; GCN: v_lshlrev_b32_e32 [[ADDRW:v[0-9]+]], 2, v0 -; CI-DAG: ds_write_b32 [[ADDRW]], {{v[0-9]*}} offset:16 -; CI-DAG: ds_write_b32 [[ADDRW]], {{v[0-9]*$}} +; CI-DAG: ds_write2_b32 [[ADDRW]], {{v[0-9]+}}, {{v[0-9]+}} offset1:4 ; SI: v_add_i32_e32 [[ADDRW_OFF:v[0-9]+]], vcc, 16, [[ADDRW]] diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll index 0e9618523e3..f6ed41d9dcd 100644 --- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -156,7 +156,8 @@ define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out, ; FUNC-LABEL: @reorder_local_offsets ; CI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:100 offset1:102 -; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400 +; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:3 offset1:100 +; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12 ; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408 ; CI: buffer_store_dword ; CI: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/store-v3i64.ll b/llvm/test/CodeGen/AMDGPU/store-v3i64.ll index b4d7505e0a8..03ae01afbcd 100644 --- a/llvm/test/CodeGen/AMDGPU/store-v3i64.ll +++ b/llvm/test/CodeGen/AMDGPU/store-v3i64.ll @@ -46,8 +46,7 @@ define void @global_store_v3i64_unaligned(<3 x i64> addrspace(1)* %out, <3 x i64 } ; GCN-LABEL: {{^}}local_store_v3i64: -; GCN: ds_write_b64 -; GCN: ds_write_b64 +; GCN: ds_write2_b64 ; GCN: ds_write_b64 define void @local_store_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> %x) { store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 32 diff --git a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll index 0435ed4d552..7a72a4c7ba0 100644 --- a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll +++ b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll @@ -42,12 +42,10 @@ define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, floa } ; GCN-LABEL: {{^}}test_use_s_v_s: -; GCN-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} -; GCN-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}} - ; GCN: buffer_load_dword [[VA0:v[0-9]+]] -; GCN-NOT: v_mov_b32 ; GCN: buffer_load_dword [[VA1:v[0-9]+]] +; GCN-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; GCN-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}} ; GCN-NOT: v_mov_b32 ; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] |

