diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 86 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/EvergreenInstructions.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/bfe-patterns.ll | 163 |
4 files changed, 215 insertions, 39 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index e76891c3bed..d0c62877524 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -72,6 +72,40 @@ def u8imm : Operand<i8> { def brtarget : Operand<OtherVT>; //===----------------------------------------------------------------------===// +// Misc. PatFrags +//===----------------------------------------------------------------------===// + +class HasOneUseBinOp<SDPatternOperator op> : PatFrag< + (ops node:$src0, node:$src1), + (op $src0, $src1), + [{ return N->hasOneUse(); }] +>; + +class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag< + (ops node:$src0, node:$src1, node:$src2), + (op $src0, $src1, $src2), + [{ return N->hasOneUse(); }] +>; + + +let Properties = [SDNPCommutative, SDNPAssociative] in { +def smax_oneuse : HasOneUseBinOp<smax>; +def smin_oneuse : HasOneUseBinOp<smin>; +def umax_oneuse : HasOneUseBinOp<umax>; +def umin_oneuse : HasOneUseBinOp<umin>; +def fminnum_oneuse : HasOneUseBinOp<fminnum>; +def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>; +def and_oneuse : HasOneUseBinOp<and>; +def or_oneuse : HasOneUseBinOp<or>; +def xor_oneuse : HasOneUseBinOp<xor>; +} // Properties = [SDNPCommutative, SDNPAssociative] + +def sub_oneuse : HasOneUseBinOp<sub>; +def shl_oneuse : HasOneUseBinOp<shl>; + +def select_oneuse : HasOneUseTernaryOp<select>; + +//===----------------------------------------------------------------------===// // PatLeafs for floating-point comparisons //===----------------------------------------------------------------------===// @@ -157,22 +191,6 @@ def COND_NULL : PatLeaf < //===----------------------------------------------------------------------===// -// Misc. PatFrags -//===----------------------------------------------------------------------===// - -class HasOneUseBinOp<SDPatternOperator op> : PatFrag< - (ops node:$src0, node:$src1), - (op $src0, $src1), - [{ return N->hasOneUse(); }] ->; - -class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag< - (ops node:$src0, node:$src1, node:$src2), - (op $src0, $src1, $src2), - [{ return N->hasOneUse(); }] ->; - -//===----------------------------------------------------------------------===// // Load/Store Pattern Fragments //===----------------------------------------------------------------------===// @@ -608,10 +626,22 @@ def IMMPopCount : SDNodeXForm<imm, [{ MVT::i32); }]>; -class BFEPattern <Instruction BFE, Instruction MOV> : Pat < - (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)), - (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask)))) ->; +multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> { + def : Pat < + (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)), + (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask)))) + >; + + def : Pat < + (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), + (UBFE $src, (i32 0), $width) + >; + + def : Pat < + (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), + (SBFE $src, (i32 0), $width) + >; +} // rotr pattern class ROTRPattern <Instruction BIT_ALIGN> : Pat < @@ -630,22 +660,6 @@ class IntMed3Pat<Instruction med3Inst, (med3Inst $src0, $src1, $src2) >; -let Properties = [SDNPCommutative, SDNPAssociative] in { -def smax_oneuse : HasOneUseBinOp<smax>; -def smin_oneuse : HasOneUseBinOp<smin>; -def umax_oneuse : HasOneUseBinOp<umax>; -def umin_oneuse : HasOneUseBinOp<umin>; -def fminnum_oneuse : HasOneUseBinOp<fminnum>; -def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>; -def and_oneuse : HasOneUseBinOp<and>; -def or_oneuse : HasOneUseBinOp<or>; -def xor_oneuse : HasOneUseBinOp<xor>; -} // Properties = [SDNPCommutative, SDNPAssociative] - -def sub_oneuse : HasOneUseBinOp<sub>; - -def select_oneuse : HasOneUseTernaryOp<select>; - // Special conversion patterns def cvt_rpi_i32_f32 : PatFrag < diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td index 10d32482a60..f7296b487be 100644 --- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td @@ -388,7 +388,7 @@ def BFE_INT_eg : R600_3OP <0x5, "BFE_INT", VecALU >; -def : BFEPattern <BFE_UINT_eg, MOV_IMM_I32>; +defm : BFEPattern <BFE_UINT_eg, BFE_INT_eg, MOV_IMM_I32>; def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))], diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 1a40bd72ad3..e496496b3c5 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1069,8 +1069,7 @@ multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> { defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>; // FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>; - -def : BFEPattern <V_BFE_U32, S_MOV_B32>; +defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>; def : Pat< (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), diff --git a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll new file mode 100644 index 00000000000..e48744f40fb --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll @@ -0,0 +1,163 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +; GCN-LABEL: {{^}}v_ubfe_sub_i32: +; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] +; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] +; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]] +define void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { + %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x + %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x + %src = load volatile i32, i32 addrspace(1)* %in0.gep + %width = load volatile i32, i32 addrspace(1)* %in0.gep + %sub = sub i32 32, %width + %shl = shl i32 %src, %sub + %bfe = lshr i32 %shl, %sub + store i32 %bfe, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32: +; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] +; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] +; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] + +; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]] +; SI-NEXT: v_lshr_b32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]] + +; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] +; VI-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] + +; GCN: [[BFE]] +; GCN: [[SHL]] +define void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { + %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x + %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x + %src = load volatile i32, i32 addrspace(1)* %in0.gep + %width = load volatile i32, i32 addrspace(1)* %in0.gep + %sub = sub i32 32, %width + %shl = shl i32 %src, %sub + %bfe = lshr i32 %shl, %sub + store i32 %bfe, i32 addrspace(1)* %out.gep + store volatile i32 %shl, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}s_ubfe_sub_i32: +; GCN: s_load_dword [[SRC:s[0-9]+]] +; GCN: s_load_dword [[WIDTH:s[0-9]+]] +; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]] +; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]] +define void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { + %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x + %sub = sub i32 32, %width + %shl = shl i32 %src, %sub + %bfe = lshr i32 %shl, %sub + store i32 %bfe, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}s_ubfe_sub_multi_use_shl_i32: +; GCN: s_load_dword [[SRC:s[0-9]+]] +; GCN: s_load_dword [[WIDTH:s[0-9]+]] +; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, [[WIDTH]] +; GCN-NEXT: s_lshl_b32 [[SHL:s[0-9]+]], [[SRC]], [[SUB]] +; GCN-NEXT: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]] +define void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { + %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x + %sub = sub i32 32, %width + %shl = shl i32 %src, %sub + %bfe = lshr i32 %shl, %sub + store i32 %bfe, i32 addrspace(1)* %out.gep + store volatile i32 %shl, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}v_sbfe_sub_i32: +; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] +; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] +; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]] +define void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { + %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x + %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x + %src = load volatile i32, i32 addrspace(1)* %in0.gep + %width = load volatile i32, i32 addrspace(1)* %in0.gep + %sub = sub i32 32, %width + %shl = shl i32 %src, %sub + %bfe = ashr i32 %shl, %sub + store i32 %bfe, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32: +; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] +; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] +; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] + +; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]] +; SI-NEXT: v_ashr_i32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]] + +; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] +; VI-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] + +; GCN: [[BFE]] +; GCN: [[SHL]] +define void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { + %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x + %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x + %src = load volatile i32, i32 addrspace(1)* %in0.gep + %width = load volatile i32, i32 addrspace(1)* %in0.gep + %sub = sub i32 32, %width + %shl = shl i32 %src, %sub + %bfe = ashr i32 %shl, %sub + store i32 %bfe, i32 addrspace(1)* %out.gep + store volatile i32 %shl, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}s_sbfe_sub_i32: +; GCN: s_load_dword [[SRC:s[0-9]+]] +; GCN: s_load_dword [[WIDTH:s[0-9]+]] +; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]] +; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]] +define void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { + %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x + %sub = sub i32 32, %width + %shl = shl i32 %src, %sub + %bfe = ashr i32 %shl, %sub + store i32 %bfe, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}s_sbfe_sub_multi_use_shl_i32: +; GCN: s_load_dword [[SRC:s[0-9]+]] +; GCN: s_load_dword [[WIDTH:s[0-9]+]] +; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, [[WIDTH]] +; GCN-NEXT: s_lshl_b32 [[SHL:s[0-9]+]], [[SRC]], [[SUB]] +; GCN-NEXT: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]] +define void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { + %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x + %sub = sub i32 32, %width + %shl = shl i32 %src, %sub + %bfe = ashr i32 %shl, %sub + store i32 %bfe, i32 addrspace(1)* %out.gep + store volatile i32 %shl, i32 addrspace(1)* undef + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } |

