diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-05-22 18:09:07 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-05-22 18:09:07 +0000 |
| commit | 5565f65e1398ff7633ba7e52d80f6b02066b8f33 (patch) | |
| tree | 10dc9d7ede00f08946bf6799461d7941bc7fee94 /llvm/test | |
| parent | bf8694d36d8e7f5a5b459a0fb134e4f84daa6376 (diff) | |
| download | bcm5719-llvm-5565f65e1398ff7633ba7e52d80f6b02066b8f33.tar.gz bcm5719-llvm-5565f65e1398ff7633ba7e52d80f6b02066b8f33.zip | |
R600: Add dag combine for BFE
llvm-svn: 209461
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll | 248 | ||||
| -rw-r--r-- | llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll | 419 | ||||
| -rw-r--r-- | llvm/test/CodeGen/R600/sext-in-reg.ll | 84 |
3 files changed, 751 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll index b3fec06f18d..71d2b6e7c2e 100644 --- a/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll +++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll @@ -48,3 +48,251 @@ define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) no store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: @bfe_i32_arg_0_width_reg_offset +; SI-NOT: BFE +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_arg_0_width_imm_offset +; SI-NOT: BFE +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_0 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_1 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_2 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_3 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_4 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_5 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_6 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0xffffff80 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_7 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_8 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_9 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_10 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_11 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -6 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_12 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_13 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_14 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_15 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_16 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_17 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_18 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll index 0d478638219..6ed1ad5d2e6 100644 --- a/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll +++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll @@ -38,3 +38,422 @@ define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: @bfe_u32_arg_0_width_reg_offset +; SI-NOT: BFE +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_arg_0_width_imm_offset +; SI-NOT: BFE +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zextload_i8 +; SI: BUFFER_LOAD_UBYTE +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { + %load = load i8 addrspace(1)* %in + %ext = zext i8 %load to i32 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i8 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI-NEXT: V_AND_B32_e32 +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 255 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i16 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI-NEXT: V_AND_B32_e32 +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 65535 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 16) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_1 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 255 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 1, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_3 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0xf8 +; SI-NEXT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 255 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 3, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_7 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0x80 +; SI-NEXT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 255 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 7, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i16_offset_8 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI-NEXT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 65535 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 8, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_1 +; SI: BUFFER_LOAD_DWORD +; SI: V_BFE_U32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1 +; SI: S_ENDPGM +; EG: BFE_UINT +define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_4 +; SI-NOT: LSHL +; SI-NOT: SHR +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %shr = lshr i32 %shl, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_5 +; SI: BUFFER_LOAD_DWORD +; SI-NOT: LSHL +; SI-NOT: SHR +; SI: V_BFE_I32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1 +; SI: S_ENDPGM +define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %shr = ashr i32 %shl, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 0, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_6 +; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI: V_BFE_U32 {{v[0-9]+}}, {{v[0-9]+}}, 1, 31 +; SI: S_ENDPGM +define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 1, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_7 +; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_8 +; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI: V_BFE_U32 {{v[0-9]+}}, {{v[0-9]+}}, 31, 1 +; SI: S_ENDPGM +define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_0 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_1 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_2 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_3 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_4 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_5 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_6 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x80 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_7 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_8 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_9 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFEfppppppppppppp +define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_10 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_11 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_12 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_13 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_14 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_15 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_16 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/R600/sext-in-reg.ll b/llvm/test/CodeGen/R600/sext-in-reg.ll index d2ab0b92879..404c9b8b812 100644 --- a/llvm/test/CodeGen/R600/sext-in-reg.ll +++ b/llvm/test/CodeGen/R600/sext-in-reg.ll @@ -382,10 +382,57 @@ define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 ad declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone +; FUNC-LABEL: @bfe_0_width +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %load = load i32 addrspace(1)* %ptr, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_8_bfe_8 +; SI: V_BFE_I32 +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %load = load i32 addrspace(1)* %ptr, align 4 + %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone + %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone + store i32 %bfe1, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_8_bfe_16 +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 +; SI: S_ENDPGM +define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %load = load i32 addrspace(1)* %ptr, align 4 + %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone + %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone + store i32 %bfe1, i32 addrspace(1)* %out, align 4 + ret void +} + +; This really should be folded into 1 +; FUNC-LABEL: @bfe_16_bfe_8 +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16 +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 +; SI: S_ENDPGM +define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %load = load i32 addrspace(1)* %ptr, align 4 + %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone + %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone + store i32 %bfe1, i32 addrspace(1)* %out, align 4 + ret void +} + ; Make sure there isn't a redundant BFE ; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe ; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000 ; SI-NOT: BFE +; SI: S_ENDPGM define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %c = add i32 %a, %b ; add to prevent folding into extload %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone @@ -394,3 +441,40 @@ define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) n store i32 %ashr, i32 addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe_wrong +define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %c = add i32 %a, %b ; add to prevent folding into extload + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone + %shl = shl i32 %bfe, 24 + %ashr = ashr i32 %shl, 24 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sextload_i8_to_i32_bfe +; SI: BUFFER_LOAD_SBYTE +; SI-NOT: BFE +; SI: S_ENDPGM +define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind { + %load = load i8 addrspace(1)* %ptr, align 1 + %sext = sext i8 %load to i32 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone + %shl = shl i32 %bfe, 24 + %ashr = ashr i32 %shl, 24 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sextload_i8_to_i32_bfe_0: +; SI-NOT: BFE +; SI: S_ENDPGM +define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind { + %load = load i8 addrspace(1)* %ptr, align 1 + %sext = sext i8 %load to i32 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone + %shl = shl i32 %bfe, 24 + %ashr = ashr i32 %shl, 24 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} |

