diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-05-22 18:09:12 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-05-22 18:09:12 +0000 |
commit | 05e96f444453bdde2d2b0b15967f6936f057be3f (patch) | |
tree | 1ca8cca1dd5622bdcfccd371f8089fb94604fbe0 /llvm/test | |
parent | 5565f65e1398ff7633ba7e52d80f6b02066b8f33 (diff) | |
download | bcm5719-llvm-05e96f444453bdde2d2b0b15967f6936f057be3f.tar.gz bcm5719-llvm-05e96f444453bdde2d2b0b15967f6936f057be3f.zip |
R600: Try to convert BFE back to standard bit ops when possible.
This allows existing DAG combines to work on them, and then
we can re-match to BFE if necessary during instruction selection.
llvm-svn: 209462
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll | 128 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll | 105 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/sext-in-reg.ll | 48 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/udivrem64.ll | 6 |
4 files changed, 276 insertions, 11 deletions
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll index 71d2b6e7c2e..eb509423282 100644 --- a/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll +++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll @@ -69,6 +69,115 @@ define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i ret void } +; FUNC-LABEL: @bfe_i32_test_6 +; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} +; SI: S_ENDPGM +define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_7 +; SI-NOT: SHL +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FIXME: The shifts should be 1 BFE +; FUNC-LABEL: @bfe_i32_test_8 +; SI: BUFFER_LOAD_DWORD +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 +; SI: S_ENDPGM +define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_9 +; SI-NOT: BFE +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_10 +; SI-NOT: BFE +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_11 +; SI-NOT: BFE +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_12 +; SI-NOT: BFE +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_13 +; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = ashr i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void +} + +; FUNC-LABEL: @bfe_i32_test_14 +; SI-NOT: LSHR +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = lshr i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void +} + ; FUNC-LABEL: @bfe_i32_constant_fold_test_0 ; SI-NOT: BFE ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 @@ -296,3 +405,22 @@ define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind { store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 ret void } + +; XXX - This should really be a single BFE, but the sext_inreg of the +; extended type i24 is never custom lowered. +; FUNC-LABEL: @bfe_sext_in_reg_i24 +; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]], +; SI: V_LSHLREV_B32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}} +; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}} +; XSI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8 +; XSI-NOT: SHL +; XSI-NOT: SHR +; XSI: BUFFER_STORE_DWORD [[BFE]], +define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24) + %shl = shl i32 %bfe, 8 + %ashr = ashr i32 %shl, 8 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll index 6ed1ad5d2e6..1a62253eeb7 100644 --- a/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll +++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll @@ -161,9 +161,9 @@ define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrsp ; FUNC-LABEL: @bfe_u32_test_1 ; SI: BUFFER_LOAD_DWORD -; SI: V_BFE_U32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1 +; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} ; SI: S_ENDPGM -; EG: BFE_UINT +; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1, define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { %x = load i32 addrspace(1)* %in, align 4 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1) @@ -220,7 +220,7 @@ define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw ; FUNC-LABEL: @bfe_u32_test_6 ; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} -; SI: V_BFE_U32 {{v[0-9]+}}, {{v[0-9]+}}, 1, 31 +; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} ; SI: S_ENDPGM define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { %x = load i32 addrspace(1)* %in, align 4 @@ -243,8 +243,9 @@ define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw } ; FUNC-LABEL: @bfe_u32_test_8 -; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} -; SI: V_BFE_U32 {{v[0-9]+}}, {{v[0-9]+}}, 31, 1 +; SI-NOT: BFE +; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} +; SI-NOT: BFE ; SI: S_ENDPGM define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { %x = load i32 addrspace(1)* %in, align 4 @@ -254,6 +255,76 @@ define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw ret void } +; FUNC-LABEL: @bfe_u32_test_9 +; SI-NOT: BFE +; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_10 +; SI-NOT: BFE +; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_11 +; SI-NOT: BFE +; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_12 +; SI-NOT: BFE +; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_13 +; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = ashr i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void +} + +; FUNC-LABEL: @bfe_u32_test_14 +; SI-NOT: LSHR +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = lshr i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void +} + ; FUNC-LABEL: @bfe_u32_constant_fold_test_0 ; SI-NOT: BFE ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 @@ -457,3 +528,27 @@ define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind { store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: @bfe_u32_constant_fold_test_17 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_18 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/R600/sext-in-reg.ll b/llvm/test/CodeGen/R600/sext-in-reg.ll index 404c9b8b812..1b02e4bf801 100644 --- a/llvm/test/CodeGen/R600/sext-in-reg.ll +++ b/llvm/test/CodeGen/R600/sext-in-reg.ll @@ -417,8 +417,8 @@ define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwi ; This really should be folded into 1 ; FUNC-LABEL: @bfe_16_bfe_8 -; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 +; SI-NOT: BFE ; SI: S_ENDPGM define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { %load = load i32 addrspace(1)* %ptr, align 4 @@ -430,7 +430,7 @@ define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwi ; Make sure there isn't a redundant BFE ; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe -; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000 +; SI: S_SEXT_I32_I8 s{{[0-9]+}}, s{{[0-9]+}} ; SI-NOT: BFE ; SI: S_ENDPGM define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { @@ -478,3 +478,47 @@ define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* % store i32 %ashr, i32 addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_0: +; SI-NOT: SHR +; SI-NOT: SHL +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 +; SI: S_ENDPGM +define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %shr = ashr i32 %shl, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_1 +; SI: BUFFER_LOAD_DWORD +; SI-NOT: SHL +; SI-NOT: SHR +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1 +; SI: S_ENDPGM +define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 30 + %shr = ashr i32 %shl, 30 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sext_in_reg_i2_bfe_offset_1: +; SI: BUFFER_LOAD_DWORD +; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}} +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}} +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2 +; SI: S_ENDPGM +define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 30 + %shr = ashr i32 %shl, 30 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/R600/udivrem64.ll b/llvm/test/CodeGen/R600/udivrem64.ll index b3caebf123d..a71315a12d8 100644 --- a/llvm/test/CodeGen/R600/udivrem64.ll +++ b/llvm/test/CodeGen/R600/udivrem64.ll @@ -3,8 +3,7 @@ ;FUNC-LABEL: @test_udiv ;EG: RECIP_UINT -;EG: BFE_UINT -;EG: BFE_UINT +;EG: LSHL {{.*}}, 1, ;EG: BFE_UINT ;EG: BFE_UINT ;EG: BFE_UINT @@ -74,8 +73,7 @@ define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG: BFE_UINT ;EG: BFE_UINT ;EG: BFE_UINT -;EG: BFE_UINT -;EG: BFE_UINT +;EG: AND_INT {{.*}}, 1, ;SI: S_ENDPGM define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) { %result = urem i64 %x, %y |