diff options
| author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-06-18 20:00:24 +0000 |
|---|---|---|
| committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-06-18 20:00:24 +0000 |
| commit | bb1c8b6f5caeb985dc965bd4a6e96be80f777733 (patch) | |
| tree | 19bc557f7c26139eaa3deec3ac9438eaf48cb516 /llvm/lib/Target | |
| parent | acc93d62e04844a91265ec3973d80199653098d8 (diff) | |
| download | bcm5719-llvm-bb1c8b6f5caeb985dc965bd4a6e96be80f777733.tar.gz bcm5719-llvm-bb1c8b6f5caeb985dc965bd4a6e96be80f777733.zip | |
[AMDGPU] gfx10 wave32 patterns
Differential Revision: https://reviews.llvm.org/D63511
llvm-svn: 363729
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 73 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SOPInstructions.td | 18 |
3 files changed, 86 insertions, 15 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 2c68fdf2db3..d8738a8b119 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -167,7 +167,6 @@ def S_ADD_U64_CO_PSEUDO : SPseudoInstSI < def S_SUB_U64_CO_PSEUDO : SPseudoInstSI < (outs SReg_64:$vdst, VOPDstS64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1) >; - } // End usesCustomInserter = 1, Defs = [SCC] let usesCustomInserter = 1 in { @@ -563,7 +562,16 @@ def : GCNPat < def : GCNPat < (AMDGPUinit_exec i64:$src), (SI_INIT_EXEC (as_i64imm $src)) ->; +> { + let WaveSizePredicate = isWave64; +} + +def : GCNPat < + (AMDGPUinit_exec i64:$src), + (SI_INIT_EXEC_LO (as_i32imm $src)) +> { + let WaveSizePredicate = isWave32; +} def : GCNPat < (AMDGPUinit_exec_from_input i32:$input, i32:$shift), @@ -1170,7 +1178,16 @@ def : GCNPat < def : GCNPat < (i1 imm:$imm), (S_MOV_B64 (i64 (as_i64imm $imm))) ->; +> { + let WaveSizePredicate = isWave64; +} + +def : GCNPat < + (i1 imm:$imm), + (S_MOV_B32 (i32 (as_i32imm $imm))) +> { + let WaveSizePredicate = isWave32; +} def : GCNPat < (f64 InlineFPImm<f64>:$imm), @@ -1361,10 +1378,12 @@ def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, i64, f64, fp_to_sint>; // If we need to perform a logical operation on i1 values, we need to // use vector comparisons since there is only one SCC register. Vector -// comparisons still write to a pair of SGPRs, so treat these as -// 64-bit comparisons. When legalizing SGPR copies, instructions -// resulting in the copies from SCC to these instructions will be -// moved to the VALU. +// comparisons may write to a pair of SGPRs or a single SGPR, so treat +// these as 32 or 64-bit comparisons. When legalizing SGPR copies, +// instructions resulting in the copies from SCC to these instructions +// will be moved to the VALU. + +let WaveSizePredicate = isWave64 in { def : GCNPat < (i1 (and i1:$src0, i1:$src1)), (S_AND_B64 $src0, $src1) @@ -1401,6 +1420,46 @@ def : GCNPat < (S_NOT_B64 $src0) >; } +} // end isWave64 + +let WaveSizePredicate = isWave32 in { +def : GCNPat < + (i1 (and i1:$src0, i1:$src1)), + (S_AND_B32 $src0, $src1) +>; + +def : GCNPat < + (i1 (or i1:$src0, i1:$src1)), + (S_OR_B32 $src0, $src1) +>; + +def : GCNPat < + (i1 (xor i1:$src0, i1:$src1)), + (S_XOR_B32 $src0, $src1) +>; + +def : GCNPat < + (i1 (add i1:$src0, i1:$src1)), + (S_XOR_B32 $src0, $src1) +>; + +def : GCNPat < + (i1 (sub i1:$src0, i1:$src1)), + (S_XOR_B32 $src0, $src1) +>; + +let AddedComplexity = 1 in { +def : GCNPat < + (i1 (add i1:$src0, (i1 -1))), + (S_NOT_B32 $src0) +>; + +def : GCNPat < + (i1 (sub i1:$src0, (i1 -1))), + (S_NOT_B32 $src0) +>; +} +} // end isWave32 def : GCNPat < (f16 (sint_to_fp i1:$src)), diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index aab7f79ffc9..8df3c313616 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -454,7 +454,7 @@ def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16 // Subset of SReg_32 without M0 for SMRD instructions and alike. // See comments in SIInstructions.td for more info. -def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI, SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID, @@ -462,23 +462,23 @@ def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f1 let AllocationPriority = 8; } -def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> { let AllocationPriority = 8; } -def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> { let AllocationPriority = 8; } // Register class for all scalar registers (SGPRs + Special Registers) -def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> { let AllocationPriority = 8; } -def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS)> { let isAllocatable = 0; } diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 4ca217ffc54..d7bb869377a 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -152,12 +152,24 @@ let Defs = [SCC] in { [(set i64:$sdst, (not i64:$src0))] >; def S_WQM_B32 : SOP1_32 <"s_wqm_b32">; - def S_WQM_B64 : SOP1_64 <"s_wqm_b64", - [(set i1:$sdst, (int_amdgcn_wqm_vote i1:$src0))] - >; + def S_WQM_B64 : SOP1_64 <"s_wqm_b64">; } // End Defs = [SCC] +let WaveSizePredicate = isWave32 in { +def : GCNPat < + (int_amdgcn_wqm_vote i1:$src0), + (S_WQM_B32 $src0) +>; +} + +let WaveSizePredicate = isWave64 in { +def : GCNPat < + (int_amdgcn_wqm_vote i1:$src0), + (S_WQM_B64 $src0) +>; +} + def S_BREV_B32 : SOP1_32 <"s_brev_b32", [(set i32:$sdst, (bitreverse i32:$src0))] >; |

