diff options
author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2018-03-30 16:19:13 +0000 |
---|---|---|
committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2018-03-30 16:19:13 +0000 |
commit | 74e2974ac6af988922c7bd2edf3c70531c03c798 (patch) | |
tree | 0220bb5373b541614d821e78d4e10255cd964d94 /llvm/lib/Target/AMDGPU | |
parent | 0961218c109feac256c612f79a3d7c1249aef0f3 (diff) | |
download | bcm5719-llvm-74e2974ac6af988922c7bd2edf3c70531c03c798.tar.gz bcm5719-llvm-74e2974ac6af988922c7bd2edf3c70531c03c798.zip |
[AMDGPU] Fixed some instructions latencies
Differential Revision: https://reviews.llvm.org/D45073
llvm-svn: 328874
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP1Instructions.td | 15 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 11 |
2 files changed, 13 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index f793fe6b249..38bca6ca35b 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -193,13 +193,14 @@ defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>; defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; -defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; let SchedRW = [WriteQuarterRate32] in { +defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>; defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32>; defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; +defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>; } // End SchedRW = [WriteQuarterRate32] let SchedRW = [WriteDouble] in { @@ -207,8 +208,6 @@ defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; } // End SchedRW = [WriteDouble]; -defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>; - let SchedRW = [WriteDouble] in { defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, fsqrt>; } // End SchedRW = [WriteDouble] @@ -223,9 +222,9 @@ defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32>; defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32>; defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>; defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>; -defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>; let SchedRW = [WriteDoubleAdd] in { +defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>; defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; } // End SchedRW = [WriteDoubleAdd] @@ -289,9 +288,7 @@ defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>; defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>; } // End Uses = [M0, EXEC] -let SchedRW = [WriteQuarterRate32] in { defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>; -} // These instruction only exist on SI and CI let SubtargetPredicate = isSICI in { @@ -335,11 +332,15 @@ defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>; defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>; +let SchedRW = [WriteQuarterRate32] in { defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, fsqrt>; defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>; defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>; +defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; +defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; +} // End SchedRW = [WriteQuarterRate32] defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16, int_amdgcn_frexp_exp>; defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>; @@ -347,8 +348,6 @@ defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>; defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>; defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>; defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; -defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; -defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; } diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 8c1a83b5d50..922187c1d12 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -283,10 +283,10 @@ def V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fmad>; def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fma>; -def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, fma>; def V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>; let SchedRW = [WriteDoubleAdd] in { +def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, fma>; def V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, fadd, 1>; def V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, fmul, 1>; def V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum, 1>; @@ -374,6 +374,7 @@ def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I3 let SchedRW = [WriteDouble]; } +let SchedRW = [Write64Bit] in { // These instructions only exist on SI and CI let SubtargetPredicate = isSICI in { def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>>; @@ -387,17 +388,17 @@ def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>>; def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>>; def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>; } // End SubtargetPredicate = isVI - +} // End SchedRW = [Write64Bit] let SubtargetPredicate = isCIVI in { -let Constraints = "@earlyclobber $vdst" in { +let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in { def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>; def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP>>; -} // End Constraints = "@earlyclobber $vdst" +} // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] let isCommutable = 1 in { -let SchedRW = [WriteDouble, WriteSALU] in { +let SchedRW = [WriteQuarterRate32, WriteSALU] in { def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; } // End SchedRW = [WriteDouble, WriteSALU] |