; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GPRIDX %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=MOVREL %s define float @dyn_extract_v8f32_const_s_v(i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v8f32_const_s_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: s_mov_b32 s4, 1.0 ; GPRIDX-NEXT: s_mov_b32 s5, 2.0 ; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 ; GPRIDX-NEXT: s_mov_b32 s7, 4.0 ; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 ; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 ; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 ; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 ; GPRIDX-NEXT: s_mov_b64 s[12:13], exec ; GPRIDX-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s14, v0 ; GPRIDX-NEXT: s_mov_b32 m0, s14 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0 ; GPRIDX-NEXT: s_movrels_b32 s14, s4 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s14 ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc ; GPRIDX-NEXT: s_cbranch_execnz BB0_1 ; GPRIDX-NEXT: ; %bb.2: ; GPRIDX-NEXT: s_mov_b64 exec, s[12:13] ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v8f32_const_s_v: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: s_mov_b32 s4, 1.0 ; MOVREL-NEXT: s_mov_b32 s5, 2.0 ; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 ; MOVREL-NEXT: s_mov_b32 s7, 4.0 ; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 ; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 ; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 ; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 ; MOVREL-NEXT: s_mov_b64 s[12:13], exec ; MOVREL-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s14, v0 ; MOVREL-NEXT: s_mov_b32 m0, s14 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0 ; MOVREL-NEXT: s_movrels_b32 s14, s4 ; MOVREL-NEXT: v_mov_b32_e32 v1, s14 ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc ; MOVREL-NEXT: s_cbranch_execnz BB0_1 ; MOVREL-NEXT: ; %bb.2: ; MOVREL-NEXT: s_mov_b64 exec, s[12:13] ; MOVREL-NEXT: v_mov_b32_e32 v0, v1 ; MOVREL-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x float> , i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s4, 1.0 ; GPRIDX-NEXT: s_mov_b32 m0, s2 ; GPRIDX-NEXT: s_mov_b32 s5, 2.0 ; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 ; GPRIDX-NEXT: s_mov_b32 s7, 4.0 ; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 ; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 ; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 ; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 ; GPRIDX-NEXT: s_movrels_b32 s0, s4 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f32_const_s_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s4, 1.0 ; MOVREL-NEXT: s_mov_b32 m0, s2 ; MOVREL-NEXT: s_mov_b32 s5, 2.0 ; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 ; MOVREL-NEXT: s_mov_b32 s7, 4.0 ; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 ; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 ; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 ; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 ; MOVREL-NEXT: s_movrels_b32 s0, s4 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> , i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v8f32_s_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b64 s[8:9], exec ; GPRIDX-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s10, v0 ; GPRIDX-NEXT: s_mov_b32 m0, s10 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0 ; GPRIDX-NEXT: s_movrels_b32 s10, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s10 ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc ; GPRIDX-NEXT: s_cbranch_execnz BB2_1 ; GPRIDX-NEXT: ; %bb.2: ; GPRIDX-NEXT: s_mov_b64 exec, s[8:9] ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f32_s_v: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b64 s[8:9], exec ; MOVREL-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s10, v0 ; MOVREL-NEXT: s_mov_b32 m0, s10 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0 ; MOVREL-NEXT: s_movrels_b32 s10, s0 ; MOVREL-NEXT: v_mov_b32_e32 v1, s10 ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc ; MOVREL-NEXT: s_cbranch_execnz BB2_1 ; MOVREL-NEXT: ; %bb.2: ; MOVREL-NEXT: s_mov_b64 exec, s[8:9] ; MOVREL-NEXT: v_mov_b32_e32 v0, v1 ; MOVREL-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> %vec, i32 %sel ret float %ext } define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v8f32_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec ; GPRIDX-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 ; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v9, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc ; GPRIDX-NEXT: s_cbranch_execnz BB3_1 ; GPRIDX-NEXT: ; %bb.2: ; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] ; GPRIDX-NEXT: v_mov_b32_e32 v0, v9 ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v8f32_v_v: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: s_mov_b64 s[4:5], exec ; MOVREL-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 ; MOVREL-NEXT: s_mov_b32 m0, s6 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 ; MOVREL-NEXT: v_movrels_b32_e32 v9, v0 ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc ; MOVREL-NEXT: s_cbranch_execnz BB3_1 ; MOVREL-NEXT: ; %bb.2: ; MOVREL-NEXT: s_mov_b64 exec, s[4:5] ; MOVREL-NEXT: v_mov_b32_e32 v0, v9 ; MOVREL-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f32_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f32_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 m0, s2 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 ; MOVREL-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f32_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 m0, s10 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_movrels_b32 s0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f32_s_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 m0, s10 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_movrels_b32 s0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> %vec, i32 %sel ret float %ext } define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v8i64_const_s_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 ; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 ; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 ; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 ; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 ; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 ; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 ; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 ; GPRIDX-NEXT: s_mov_b64 s[20:21], exec ; GPRIDX-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s22, v0 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0 ; GPRIDX-NEXT: s_lshl_b32 s22, s22, 1 ; GPRIDX-NEXT: s_add_u32 s23, s22, 1 ; GPRIDX-NEXT: s_mov_b32 m0, s22 ; GPRIDX-NEXT: s_nop 0 ; GPRIDX-NEXT: s_movrels_b32 s22, s4 ; GPRIDX-NEXT: s_mov_b32 m0, s23 ; GPRIDX-NEXT: s_nop 0 ; GPRIDX-NEXT: s_movrels_b32 s23, s4 ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc ; GPRIDX-NEXT: s_cbranch_execnz BB6_1 ; GPRIDX-NEXT: ; %bb.2: ; GPRIDX-NEXT: s_mov_b64 exec, s[20:21] ; GPRIDX-NEXT: v_mov_b32_e32 v0, s22 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s23 ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v8i64_const_s_v: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: s_mov_b64 s[4:5], 1 ; MOVREL-NEXT: s_mov_b64 s[6:7], 2 ; MOVREL-NEXT: s_mov_b64 s[8:9], 3 ; MOVREL-NEXT: s_mov_b64 s[10:11], 4 ; MOVREL-NEXT: s_mov_b64 s[12:13], 5 ; MOVREL-NEXT: s_mov_b64 s[14:15], 6 ; MOVREL-NEXT: s_mov_b64 s[16:17], 7 ; MOVREL-NEXT: s_mov_b64 s[18:19], 8 ; MOVREL-NEXT: s_mov_b64 s[20:21], exec ; MOVREL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s22, v0 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0 ; MOVREL-NEXT: s_lshl_b32 s22, s22, 1 ; MOVREL-NEXT: s_add_u32 s23, s22, 1 ; MOVREL-NEXT: s_mov_b32 m0, s22 ; MOVREL-NEXT: s_movrels_b32 s22, s4 ; MOVREL-NEXT: s_mov_b32 m0, s23 ; MOVREL-NEXT: s_movrels_b32 s23, s4 ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc ; MOVREL-NEXT: s_cbranch_execnz BB6_1 ; MOVREL-NEXT: ; %bb.2: ; MOVREL-NEXT: s_mov_b64 exec, s[20:21] ; MOVREL-NEXT: v_mov_b32_e32 v0, s22 ; MOVREL-NEXT: v_mov_b32_e32 v1, s23 ; MOVREL-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x i64> , i32 %sel ret i64 %ext } define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 ; GPRIDX-NEXT: s_mov_b32 m0, s2 ; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 ; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 ; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 ; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 ; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 ; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 ; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5] ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8i64_const_s_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b64 s[4:5], 1 ; MOVREL-NEXT: s_mov_b32 m0, s2 ; MOVREL-NEXT: s_mov_b64 s[6:7], 2 ; MOVREL-NEXT: s_mov_b64 s[8:9], 3 ; MOVREL-NEXT: s_mov_b64 s[10:11], 4 ; MOVREL-NEXT: s_mov_b64 s[12:13], 5 ; MOVREL-NEXT: s_mov_b64 s[14:15], 6 ; MOVREL-NEXT: s_mov_b64 s[16:17], 7 ; MOVREL-NEXT: s_mov_b64 s[18:19], 8 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5] ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; MOVREL-NEXT: s_endpgm entry: %ext = extractelement <8 x i64> , i32 %sel store i64 %ext, i64 addrspace(1)* undef ret void } define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v8i64_s_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_mov_b64 s[16:17], exec ; GPRIDX-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s18, v0 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0 ; GPRIDX-NEXT: s_lshl_b32 s18, s18, 1 ; GPRIDX-NEXT: s_add_u32 s19, s18, 1 ; GPRIDX-NEXT: s_mov_b32 m0, s18 ; GPRIDX-NEXT: s_nop 0 ; GPRIDX-NEXT: s_movrels_b32 s18, s0 ; GPRIDX-NEXT: s_mov_b32 m0, s19 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s18 ; GPRIDX-NEXT: s_movrels_b32 s19, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s19 ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc ; GPRIDX-NEXT: s_cbranch_execnz BB8_1 ; GPRIDX-NEXT: ; %bb.2: ; GPRIDX-NEXT: s_mov_b64 exec, s[16:17] ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[1:2], off ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8i64_s_v: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_mov_b64 s[16:17], exec ; MOVREL-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s18, v0 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0 ; MOVREL-NEXT: s_lshl_b32 s18, s18, 1 ; MOVREL-NEXT: s_add_u32 s19, s18, 1 ; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_movrels_b32 s18, s0 ; MOVREL-NEXT: s_mov_b32 m0, s19 ; MOVREL-NEXT: s_movrels_b32 s19, s0 ; MOVREL-NEXT: v_mov_b32_e32 v1, s18 ; MOVREL-NEXT: v_mov_b32_e32 v2, s19 ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc ; MOVREL-NEXT: s_cbranch_execnz BB8_1 ; MOVREL-NEXT: ; %bb.2: ; MOVREL-NEXT: s_mov_b64 exec, s[16:17] ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[1:2] ; MOVREL-NEXT: s_endpgm entry: %ext = extractelement <8 x i64> %vec, i32 %sel store i64 %ext, i64 addrspace(1)* undef ret void } define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v8i64_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec ; GPRIDX-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 ; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1 ; GPRIDX-NEXT: s_add_u32 s7, s6, 1 ; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v18, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc ; GPRIDX-NEXT: s_cbranch_execnz BB9_1 ; GPRIDX-NEXT: ; %bb.2: ; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] ; GPRIDX-NEXT: v_mov_b32_e32 v0, v17 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v18 ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v8i64_v_v: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: s_mov_b64 s[4:5], exec ; MOVREL-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s6, v16 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 ; MOVREL-NEXT: s_lshl_b32 s6, s6, 1 ; MOVREL-NEXT: s_mov_b32 m0, s6 ; MOVREL-NEXT: s_add_u32 s7, s6, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 ; MOVREL-NEXT: s_mov_b32 m0, s7 ; MOVREL-NEXT: v_movrels_b32_e32 v18, v0 ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc ; MOVREL-NEXT: s_cbranch_execnz BB9_1 ; MOVREL-NEXT: ; %bb.2: ; MOVREL-NEXT: s_mov_b64 exec, s[4:5] ; MOVREL-NEXT: v_mov_b32_e32 v0, v17 ; MOVREL-NEXT: v_mov_b32_e32 v1, v18 ; MOVREL-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x i64> %vec, i32 %sel ret i64 %ext } define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8i64_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 ; GPRIDX-NEXT: s_add_u32 s1, s0, 1 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: s_set_gpr_idx_on s1, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8i64_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_lshl_b32 s0, s2, 1 ; MOVREL-NEXT: s_mov_b32 m0, s0 ; MOVREL-NEXT: s_add_u32 s0, s0, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v16, v0 ; MOVREL-NEXT: s_mov_b32 m0, s0 ; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17] ; MOVREL-NEXT: s_endpgm entry: %ext = extractelement <8 x i64> %vec, i32 %sel store i64 %ext, i64 addrspace(1)* undef ret void } define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8i64_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_mov_b32 m0, s18 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8i64_s_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; MOVREL-NEXT: s_endpgm entry: %ext = extractelement <8 x i64> %vec, i32 %sel store i64 %ext, i64 addrspace(1)* undef ret void } define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_add_u32 m0, s10, 3 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_movrels_b32 s0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_add_u32 m0, s10, 3 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_movrels_b32 s0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 3 %ext = extractelement <8 x float> %vec, i32 %add ret float %ext } define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: v_add_u32_e32 v9, 3, v8 ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec ; GPRIDX-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v9 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v9 ; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v8, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc ; GPRIDX-NEXT: s_cbranch_execnz BB13_1 ; GPRIDX-NEXT: ; %bb.2: ; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] ; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: v_add_u32_e32 v9, vcc, 3, v8 ; MOVREL-NEXT: s_mov_b64 s[4:5], exec ; MOVREL-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s6, v9 ; MOVREL-NEXT: s_mov_b32 m0, s6 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v9 ; MOVREL-NEXT: v_movrels_b32_e32 v8, v0 ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc ; MOVREL-NEXT: s_cbranch_execnz BB13_1 ; MOVREL-NEXT: ; %bb.2: ; MOVREL-NEXT: s_mov_b64 exec, s[4:5] ; MOVREL-NEXT: v_mov_b32_e32 v0, v8 ; MOVREL-NEXT: s_setpc_b64 s[30:31] entry: %add = add i32 %sel, 3 %ext = extractelement <8 x float> %vec, i32 %add ret float %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset1: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_add_u32 m0, s18, 1 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset1: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_add_u32 m0, s18, 1 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 1 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset2: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_add_u32 m0, s18, 2 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset2: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_add_u32 m0, s18, 2 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 2 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_add_u32 m0, s18, 3 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset3: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_add_u32 m0, s18, 3 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 3 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset4: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_add_u32 m0, s18, 4 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset4: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_add_u32 m0, s18, 4 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 4 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset5: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_add_u32 m0, s18, 5 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset5: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_add_u32 m0, s18, 5 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 5 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset6: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_add_u32 m0, s18, 6 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset6: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_add_u32 m0, s18, 6 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 6 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_add_u32 m0, s18, 7 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_add_u32 m0, s18, 7 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 7 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offsetm1: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_add_u32 m0, s18, -1 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offsetm1: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_add_u32 m0, s18, -1 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, -1 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: v_add_u32_e32 v18, 3, v16 ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec ; GPRIDX-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v18 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v18 ; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1 ; GPRIDX-NEXT: s_add_u32 s7, s6, 1 ; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc ; GPRIDX-NEXT: s_cbranch_execnz BB22_1 ; GPRIDX-NEXT: ; %bb.2: ; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] ; GPRIDX-NEXT: v_mov_b32_e32 v0, v16 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v17 ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: v_add_u32_e32 v18, vcc, 3, v16 ; MOVREL-NEXT: s_mov_b64 s[4:5], exec ; MOVREL-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s6, v18 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v18 ; MOVREL-NEXT: s_lshl_b32 s6, s6, 1 ; MOVREL-NEXT: s_mov_b32 m0, s6 ; MOVREL-NEXT: s_add_u32 s7, s6, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v16, v0 ; MOVREL-NEXT: s_mov_b32 m0, s7 ; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc ; MOVREL-NEXT: s_cbranch_execnz BB22_1 ; MOVREL-NEXT: ; %bb.2: ; MOVREL-NEXT: s_mov_b64 exec, s[4:5] ; MOVREL-NEXT: v_mov_b32_e32 v0, v16 ; MOVREL-NEXT: v_mov_b32_e32 v1, v17 ; MOVREL-NEXT: s_setpc_b64 s[30:31] entry: %add = add i32 %sel, 3 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) { ; GPRIDX-LABEL: dyn_extract_v8p3_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec ; GPRIDX-NEXT: BB23_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 ; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v9, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc ; GPRIDX-NEXT: s_cbranch_execnz BB23_1 ; GPRIDX-NEXT: ; %bb.2: ; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] ; GPRIDX-NEXT: v_mov_b32_e32 v0, v9 ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v8p3_v_v: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: s_mov_b64 s[4:5], exec ; MOVREL-NEXT: BB23_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 ; MOVREL-NEXT: s_mov_b32 m0, s6 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 ; MOVREL-NEXT: v_movrels_b32_e32 v9, v0 ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc ; MOVREL-NEXT: s_cbranch_execnz BB23_1 ; MOVREL-NEXT: ; %bb.2: ; MOVREL-NEXT: s_mov_b64 exec, s[4:5] ; MOVREL-NEXT: v_mov_b32_e32 v0, v9 ; MOVREL-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx ret i8 addrspace(3)* %ext } define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) { ; GPRIDX-LABEL: dyn_extract_v8p3_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 m0, s10 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_movrels_b32 s0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: ds_write_b32 v0, v0 ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8p3_s_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 m0, s10 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_movrels_b32 s0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: s_mov_b32 m0, -1 ; MOVREL-NEXT: ds_write_b32 v0, v0 ; MOVREL-NEXT: s_endpgm entry: %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx store i8 addrspace(3)* %ext, i8 addrspace(3)* addrspace(3)* undef ret void } define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) { ; GPRIDX-LABEL: dyn_extract_v8p1_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec ; GPRIDX-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 ; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1 ; GPRIDX-NEXT: s_add_u32 s7, s6, 1 ; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v18, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc ; GPRIDX-NEXT: s_cbranch_execnz BB25_1 ; GPRIDX-NEXT: ; %bb.2: ; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] ; GPRIDX-NEXT: v_mov_b32_e32 v0, v17 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v18 ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v8p1_v_v: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: s_mov_b64 s[4:5], exec ; MOVREL-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s6, v16 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 ; MOVREL-NEXT: s_lshl_b32 s6, s6, 1 ; MOVREL-NEXT: s_mov_b32 m0, s6 ; MOVREL-NEXT: s_add_u32 s7, s6, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 ; MOVREL-NEXT: s_mov_b32 m0, s7 ; MOVREL-NEXT: v_movrels_b32_e32 v18, v0 ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc ; MOVREL-NEXT: s_cbranch_execnz BB25_1 ; MOVREL-NEXT: ; %bb.2: ; MOVREL-NEXT: s_mov_b64 exec, s[4:5] ; MOVREL-NEXT: v_mov_b32_e32 v0, v17 ; MOVREL-NEXT: v_mov_b32_e32 v1, v18 ; MOVREL-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx ret i8 addrspace(1)* %ext } define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) { ; GPRIDX-LABEL: dyn_extract_v8p1_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_mov_b32 m0, s18 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8p1_s_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; MOVREL-NEXT: s_endpgm entry: %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef ret void }