diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2020-01-02 16:45:33 -0500 |
|---|---|---|
| committer | Matt Arsenault <arsenm2@gmail.com> | 2020-01-09 19:52:24 -0500 |
| commit | 35c3d101aee240f6c034f25ff6800fda22a89987 (patch) | |
| tree | 05d1393ef43cbbdfdd2a63d6ed304e06b87876bc /llvm/test/CodeGen/AMDGPU | |
| parent | 5cabb8357aeb3bbecaef4825c3a594f86ef94c8d (diff) | |
| download | bcm5719-llvm-35c3d101aee240f6c034f25ff6800fda22a89987.tar.gz bcm5719-llvm-35c3d101aee240f6c034f25ff6800fda22a89987.zip | |
AMDGPU/GlobalISel: Select G_EXTRACT_VECTOR_ELT
Doesn't try to do the fold into the base register of an add of a
constant in the index like the DAG path does.
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll | 1289 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir | 810 |
2 files changed, 2099 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll new file mode 100644 index 00000000000..4f9d35dd905 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -0,0 +1,1289 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GPRIDX %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=MOVREL %s + +define float @dyn_extract_v8f32_const_s_v(i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v8f32_const_s_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b32 s4, 1.0 +; GPRIDX-NEXT: s_mov_b32 s5, 2.0 +; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 +; GPRIDX-NEXT: s_mov_b32 s7, 4.0 +; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 +; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 +; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 +; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 +; GPRIDX-NEXT: s_mov_b64 s[12:13], exec +; GPRIDX-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s14, v0 +; GPRIDX-NEXT: s_mov_b32 m0, s14 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0 +; GPRIDX-NEXT: s_movrels_b32 s14, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s14 +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB0_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[12:13] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v8f32_const_s_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b32 s4, 1.0 +; MOVREL-NEXT: s_mov_b32 s5, 2.0 +; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 +; MOVREL-NEXT: s_mov_b32 s7, 4.0 +; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 +; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 +; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 +; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 +; MOVREL-NEXT: s_mov_b64 s[12:13], exec +; MOVREL-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s14, v0 +; MOVREL-NEXT: s_mov_b32 m0, s14 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0 +; MOVREL-NEXT: s_movrels_b32 s14, s4 +; MOVREL-NEXT: v_mov_b32_e32 v1, s14 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB0_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[12:13] +; MOVREL-NEXT: v_mov_b32_e32 v0, v1 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s4, 1.0 +; GPRIDX-NEXT: s_mov_b32 m0, s2 +; GPRIDX-NEXT: s_mov_b32 s5, 2.0 +; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 +; GPRIDX-NEXT: s_mov_b32 s7, 4.0 +; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 +; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 +; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 +; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 +; GPRIDX-NEXT: s_movrels_b32 s0, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f32_const_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s4, 1.0 +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: s_mov_b32 s5, 2.0 +; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 +; MOVREL-NEXT: s_mov_b32 s7, 4.0 +; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 +; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 +; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 +; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 +; MOVREL-NEXT: s_movrels_b32 s0, s4 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v8f32_s_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b64 s[8:9], exec +; GPRIDX-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s10, v0 +; GPRIDX-NEXT: s_mov_b32 m0, s10 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0 +; GPRIDX-NEXT: s_movrels_b32 s10, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s10 +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB2_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[8:9] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f32_s_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b64 s[8:9], exec +; MOVREL-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s10, v0 +; MOVREL-NEXT: s_mov_b32 m0, s10 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0 +; MOVREL-NEXT: s_movrels_b32 s10, s0 +; MOVREL-NEXT: v_mov_b32_e32 v1, s10 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB2_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[8:9] +; MOVREL-NEXT: v_mov_b32_e32 v0, v1 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <8 x float> %vec, i32 %sel + ret float %ext +} + +define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v8f32_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 +; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v9, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB3_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v9 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v8f32_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 +; MOVREL-NEXT: v_movrels_b32_e32 v9, v0 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB3_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v9 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <8 x float> %vec, i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f32_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f32_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <8 x float> %vec, i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f32_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 m0, s10 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_movrels_b32 s0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f32_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 m0, s10 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_movrels_b32 s0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <8 x float> %vec, i32 %sel + ret float %ext +} + +define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v8i64_const_s_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 +; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 +; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 +; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 +; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 +; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 +; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 +; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 +; GPRIDX-NEXT: s_mov_b64 s[20:21], exec +; GPRIDX-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s22, v0 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0 +; GPRIDX-NEXT: s_lshl_b32 s22, s22, 1 +; GPRIDX-NEXT: s_add_u32 s23, s22, 1 +; GPRIDX-NEXT: s_mov_b32 m0, s22 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movrels_b32 s22, s4 +; GPRIDX-NEXT: s_mov_b32 m0, s23 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movrels_b32 s23, s4 +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB6_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[20:21] +; GPRIDX-NEXT: v_mov_b32_e32 v0, s22 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s23 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v8i64_const_s_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b64 s[4:5], 1 +; MOVREL-NEXT: s_mov_b64 s[6:7], 2 +; MOVREL-NEXT: s_mov_b64 s[8:9], 3 +; MOVREL-NEXT: s_mov_b64 s[10:11], 4 +; MOVREL-NEXT: s_mov_b64 s[12:13], 5 +; MOVREL-NEXT: s_mov_b64 s[14:15], 6 +; MOVREL-NEXT: s_mov_b64 s[16:17], 7 +; MOVREL-NEXT: s_mov_b64 s[18:19], 8 +; MOVREL-NEXT: s_mov_b64 s[20:21], exec +; MOVREL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s22, v0 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0 +; MOVREL-NEXT: s_lshl_b32 s22, s22, 1 +; MOVREL-NEXT: s_add_u32 s23, s22, 1 +; MOVREL-NEXT: s_mov_b32 m0, s22 +; MOVREL-NEXT: s_movrels_b32 s22, s4 +; MOVREL-NEXT: s_mov_b32 m0, s23 +; MOVREL-NEXT: s_movrels_b32 s23, s4 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB6_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[20:21] +; MOVREL-NEXT: v_mov_b32_e32 v0, s22 +; MOVREL-NEXT: v_mov_b32_e32 v1, s23 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel + ret i64 %ext +} + +define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 +; GPRIDX-NEXT: s_mov_b32 m0, s2 +; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 +; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 +; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 +; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 +; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 +; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 +; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 +; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GPRIDX-NEXT: s_endpgm +; +; MOVREL-LABEL: dyn_extract_v8i64_const_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b64 s[4:5], 1 +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: s_mov_b64 s[6:7], 2 +; MOVREL-NEXT: s_mov_b64 s[8:9], 3 +; MOVREL-NEXT: s_mov_b64 s[10:11], 4 +; MOVREL-NEXT: s_mov_b64 s[12:13], 5 +; MOVREL-NEXT: s_mov_b64 s[14:15], 6 +; MOVREL-NEXT: s_mov_b64 s[16:17], 7 +; MOVREL-NEXT: s_mov_b64 s[18:19], 8 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v1, s1 +; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; MOVREL-NEXT: s_endpgm +entry: + %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel + store i64 %ext, i64 addrspace(1)* undef + ret void +} + +define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v8i64_s_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_mov_b64 s[16:17], exec +; GPRIDX-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s18, v0 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0 +; GPRIDX-NEXT: s_lshl_b32 s18, s18, 1 +; GPRIDX-NEXT: s_add_u32 s19, s18, 1 +; GPRIDX-NEXT: s_mov_b32 m0, s18 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movrels_b32 s18, s0 +; GPRIDX-NEXT: s_mov_b32 m0, s19 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s18 +; GPRIDX-NEXT: s_movrels_b32 s19, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s19 +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB8_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[16:17] +; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[1:2], off +; GPRIDX-NEXT: s_endpgm +; +; MOVREL-LABEL: dyn_extract_v8i64_s_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_mov_b64 s[16:17], exec +; MOVREL-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s18, v0 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0 +; MOVREL-NEXT: s_lshl_b32 s18, s18, 1 +; MOVREL-NEXT: s_add_u32 s19, s18, 1 +; MOVREL-NEXT: s_mov_b32 m0, s18 +; MOVREL-NEXT: s_movrels_b32 s18, s0 +; MOVREL-NEXT: s_mov_b32 m0, s19 +; MOVREL-NEXT: s_movrels_b32 s19, s0 +; MOVREL-NEXT: v_mov_b32_e32 v1, s18 +; MOVREL-NEXT: v_mov_b32_e32 v2, s19 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB8_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[16:17] +; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[1:2] +; MOVREL-NEXT: s_endpgm +entry: + %ext = extractelement <8 x i64> %vec, i32 %sel + store i64 %ext, i64 addrspace(1)* undef + ret void +} + +define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v8i64_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 +; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1 +; GPRIDX-NEXT: s_add_u32 s7, s6, 1 +; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v18, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB9_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v17 +; GPRIDX-NEXT: v_mov_b32_e32 v1, v18 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v8i64_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v16 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 +; MOVREL-NEXT: s_lshl_b32 s6, s6, 1 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: s_add_u32 s7, s6, 1 +; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 +; MOVREL-NEXT: s_mov_b32 m0, s7 +; MOVREL-NEXT: v_movrels_b32_e32 v18, v0 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB9_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v17 +; MOVREL-NEXT: v_mov_b32_e32 v1, v18 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <8 x i64> %vec, i32 %sel + ret i64 %ext +} + +define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8i64_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 +; GPRIDX-NEXT: s_add_u32 s1, s0, 1 +; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_set_gpr_idx_on s1, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off +; GPRIDX-NEXT: s_endpgm +; +; MOVREL-LABEL: dyn_extract_v8i64_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_lshl_b32 s0, s2, 1 +; MOVREL-NEXT: s_mov_b32 m0, s0 +; MOVREL-NEXT: s_add_u32 s0, s0, 1 +; MOVREL-NEXT: v_movrels_b32_e32 v16, v0 +; MOVREL-NEXT: s_mov_b32 m0, s0 +; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 +; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17] +; MOVREL-NEXT: s_endpgm +entry: + %ext = extractelement <8 x i64> %vec, i32 %sel + store i64 %ext, i64 addrspace(1)* undef + ret void +} + +define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8i64_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 m0, s18 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 +; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GPRIDX-NEXT: s_endpgm +; +; MOVREL-LABEL: dyn_extract_v8i64_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 m0, s18 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v1, s1 +; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; MOVREL-NEXT: s_endpgm +entry: + %ext = extractelement <8 x i64> %vec, i32 %sel + store i64 %ext, i64 addrspace(1)* undef + ret void +} + +define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_add_u32 m0, s10, 3 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_movrels_b32 s0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_add_u32 m0, s10, 3 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_movrels_b32 s0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 3 + %ext = extractelement <8 x float> %vec, i32 %add + ret float %ext +} + +define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: v_add_u32_e32 v9, 3, v8 +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v9 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v9 +; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v8, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB13_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: v_add_u32_e32 v9, vcc, 3, v8 +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v9 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v9 +; MOVREL-NEXT: v_movrels_b32_e32 v8, v0 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB13_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v8 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %add = add i32 %sel, 3 + %ext = extractelement <8 x float> %vec, i32 %add + ret float %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset1: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_add_u32 m0, s18, 1 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset1: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_add_u32 m0, s18, 1 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 1 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset2: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_add_u32 m0, s18, 2 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset2: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_add_u32 m0, s18, 2 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 2 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset3: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_add_u32 m0, s18, 3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset3: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_add_u32 m0, s18, 3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 3 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset4: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_add_u32 m0, s18, 4 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset4: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_add_u32 m0, s18, 4 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 4 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset5: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_add_u32 m0, s18, 5 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset5: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_add_u32 m0, s18, 5 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 5 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset6: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_add_u32 m0, s18, 6 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset6: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_add_u32 m0, s18, 6 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 6 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_add_u32 m0, s18, 7 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_add_u32 m0, s18, 7 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 7 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offsetm1: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_add_u32 m0, s18, -1 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offsetm1: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_add_u32 m0, s18, -1 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, -1 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: v_add_u32_e32 v18, 3, v16 +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v18 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v18 +; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1 +; GPRIDX-NEXT: s_add_u32 s7, s6, 1 +; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB22_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v16 +; GPRIDX-NEXT: v_mov_b32_e32 v1, v17 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: v_add_u32_e32 v18, vcc, 3, v16 +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v18 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v18 +; MOVREL-NEXT: s_lshl_b32 s6, s6, 1 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: s_add_u32 s7, s6, 1 +; MOVREL-NEXT: v_movrels_b32_e32 v16, v0 +; MOVREL-NEXT: s_mov_b32 m0, s7 +; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB22_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v16 +; MOVREL-NEXT: v_mov_b32_e32 v1, v17 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %add = add i32 %sel, 3 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) { +; GPRIDX-LABEL: dyn_extract_v8p3_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB23_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 +; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v9, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB23_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v9 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v8p3_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB23_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 +; MOVREL-NEXT: v_movrels_b32_e32 v9, v0 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB23_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v9 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx + ret i8 addrspace(3)* %ext +} + +define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_extract_v8p3_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 m0, s10 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_movrels_b32 s0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: ds_write_b32 v0, v0 +; GPRIDX-NEXT: s_endpgm +; +; MOVREL-LABEL: dyn_extract_v8p3_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 m0, s10 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_movrels_b32 s0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: s_mov_b32 m0, -1 +; MOVREL-NEXT: ds_write_b32 v0, v0 +; MOVREL-NEXT: s_endpgm +entry: + %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx + store i8 addrspace(3)* %ext, i8 addrspace(3)* addrspace(3)* undef + ret void +} + +define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) { +; GPRIDX-LABEL: dyn_extract_v8p1_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 +; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1 +; GPRIDX-NEXT: s_add_u32 s7, s6, 1 +; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v18, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB25_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v17 +; GPRIDX-NEXT: v_mov_b32_e32 v1, v18 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v8p1_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v16 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 +; MOVREL-NEXT: s_lshl_b32 s6, s6, 1 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: s_add_u32 s7, s6, 1 +; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 +; MOVREL-NEXT: s_mov_b32 m0, s7 +; MOVREL-NEXT: v_movrels_b32_e32 v18, v0 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB25_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v17 +; MOVREL-NEXT: v_mov_b32_e32 v1, v18 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx + ret i8 addrspace(1)* %ext +} + +define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_extract_v8p1_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 m0, s18 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 +; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GPRIDX-NEXT: s_endpgm +; +; MOVREL-LABEL: dyn_extract_v8p1_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 m0, s18 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v1, s1 +; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; MOVREL-NEXT: s_endpgm +entry: + %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx + store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir new file mode 100644 index 00000000000..f2d53090f87 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir @@ -0,0 +1,810 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s +# RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-vgpr-index-mode -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GPRIDX %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GPRIDX %s + +--- +name: extract_vector_elt_s_s32_v2s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v2s32 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v2s32 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 + %1:sgpr(s32) = COPY $sgpr2 + %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s32_v3s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2, $sgpr3 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v3s32 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_96 = COPY $sgpr0_sgpr1_sgpr2 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v3s32 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_96 = COPY $sgpr0_sgpr1_sgpr2 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(s32) = COPY $sgpr2 + %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s32_v4s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v4s32 + ; MOVREL: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v4s32 + ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s32) = COPY $sgpr4 + %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s32_v8s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s32_v16s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v16s32 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v16s32 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s32_v32s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v32s32 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v32s32 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %1:sgpr(s32) = COPY $sgpr40 + %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s64_v2s64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v2s64 + ; MOVREL: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v2s64 + ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s32) = COPY $sgpr4 + %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s64_v4s64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v4s64 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v4s64 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + %0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s64_v8s64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s64_v16s64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v16s64 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v16s64 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + %0:sgpr(<16 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %1:sgpr(s32) = COPY $sgpr40 + %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s32_v8s32_idx_offset_1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 1 + %3:sgpr(s32) = G_ADD %1, %2 + %4:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_s_s32_v8s32_idx_offset_m1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 -1 + %3:sgpr(s32) = G_ADD %1, %2 + %4:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_s_s32_v8s32_idx_offset_7 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 7 + %3:sgpr(s32) = G_ADD %1, %2 + %4:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_s_s32_v8s32_idx_offset_8 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 8 + %3:sgpr(s32) = G_ADD %1, %2 + %4:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_s_s64_v8s64_idx_offset_1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 1 + %3:sgpr(s32) = G_ADD %1, %2 + %4:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_s_s64_v8s64_idx_offset_2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 2 + %3:sgpr(s32) = G_ADD %1, %2 + %4:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_s_s64_v8s64_idx_offset_m1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 -1 + %3:sgpr(s32) = G_ADD %1, %2 + %4:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_v_s32_v2s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $sgpr2 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v2s32 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v2s32 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:sgpr(s32) = COPY $sgpr2 + %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_v_s32_v3s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $sgpr3 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v3s32 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v3s32 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:sgpr(s32) = COPY $sgpr2 + %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_v_s32_v4s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v4s32 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v4s32 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:sgpr(s32) = COPY $sgpr4 + %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_v_s32_v8s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_v_s32_v16s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v16s32 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v16s32 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:sgpr(s32) = COPY $sgpr8 + %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_v_s32_v32s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $sgpr40 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v32s32 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v32s32 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + %1:sgpr(s32) = COPY $sgpr40 + %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_v_s32_v8s32_idx_offset_1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 1 + %3:sgpr(s32) = G_ADD %1, %2 + %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_v_s32_v8s32_idx_offset_m1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 -1 + %3:sgpr(s32) = G_ADD %1, %2 + %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_v_s32_v8s32_idx_offset_7 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 7 + %3:sgpr(s32) = G_ADD %1, %2 + %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_v_s32_v8s32_idx_offset_8 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 8 + %3:sgpr(s32) = G_ADD %1, %2 + %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... |

