diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstructions.td')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 105 |
1 files changed, 55 insertions, 50 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 4122eb915f3..b758a576047 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -374,7 +374,7 @@ def : Pat< def : Pat < (int_AMDGPU_kilp), - (SI_KILL 0xbf800000) + (SI_KILL (i32 0xbf800000)) >; def : Pat < @@ -555,7 +555,7 @@ def : BitConvert <v16f32, v16i32, VReg_512>; def : Pat < (AMDGPUclamp (VOP3Mods0Clamp f32:$src0, i32:$src0_modifiers, i32:$omod), (f32 FP_ZERO), (f32 FP_ONE)), - (V_ADD_F32_e64 $src0_modifiers, $src0, 0, 0, 1, $omod) + (V_ADD_F32_e64 $src0_modifiers, $src0, 0, (i32 0), 1, $omod) >; /********** ================================ **********/ @@ -566,7 +566,7 @@ def : Pat < def : Pat < (fneg (fabs f32:$src)), - (S_OR_B32 $src, (S_MOV_B32 0x80000000)) // Set sign bit + (S_OR_B32 $src, (S_MOV_B32(i32 0x80000000))) // Set sign bit >; // FIXME: Should use S_OR_B32 @@ -575,19 +575,19 @@ def : Pat < (REG_SEQUENCE VReg_64, (i32 (EXTRACT_SUBREG f64:$src, sub0)), sub0, - (V_OR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1), - (V_MOV_B32_e32 0x80000000)), // Set sign bit. + (V_OR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)), + (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit. sub1) >; def : Pat < (fabs f32:$src), - (V_AND_B32_e64 $src, (V_MOV_B32_e32 0x7fffffff)) + (V_AND_B32_e64 $src, (V_MOV_B32_e32 (i32 0x7fffffff))) >; def : Pat < (fneg f32:$src), - (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) + (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x80000000))) >; def : Pat < @@ -595,8 +595,8 @@ def : Pat < (REG_SEQUENCE VReg_64, (i32 (EXTRACT_SUBREG f64:$src, sub0)), sub0, - (V_AND_B32_e64 (EXTRACT_SUBREG f64:$src, sub1), - (V_MOV_B32_e32 0x7fffffff)), // Set sign bit. + (V_AND_B32_e64 (i32 (EXTRACT_SUBREG f64:$src, sub1)), + (V_MOV_B32_e32 (i32 0x7fffffff))), // Set sign bit. sub1) >; @@ -605,8 +605,8 @@ def : Pat < (REG_SEQUENCE VReg_64, (i32 (EXTRACT_SUBREG f64:$src, sub0)), sub0, - (V_XOR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1), - (V_MOV_B32_e32 0x80000000)), + (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)), + (i32 (V_MOV_B32_e32 (i32 0x80000000)))), sub1) >; @@ -666,21 +666,21 @@ def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>; def : Pat < (int_AMDGPU_cube v4f32:$src), (REG_SEQUENCE VReg_128, - (V_CUBETC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0), - 0 /* src1_modifiers */, (EXTRACT_SUBREG $src, sub1), - 0 /* src2_modifiers */, (EXTRACT_SUBREG $src, sub2), + (V_CUBETC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)), + 0 /* src1_modifiers */, (f32 (EXTRACT_SUBREG $src, sub1)), + 0 /* src2_modifiers */, (f32 (EXTRACT_SUBREG $src, sub2)), 0 /* clamp */, 0 /* omod */), sub0, - (V_CUBESC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0), - 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1), - 0 /* src2_modifiers */,(EXTRACT_SUBREG $src, sub2), + (V_CUBESC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)), + 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)), + 0 /* src2_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)), 0 /* clamp */, 0 /* omod */), sub1, - (V_CUBEMA_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0), - 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1), - 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2), + (V_CUBEMA_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)), + 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)), + 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)), 0 /* clamp */, 0 /* omod */), sub2, - (V_CUBEID_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0), - 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1), - 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2), + (V_CUBEID_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)), + 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)), + 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)), 0 /* clamp */, 0 /* omod */), sub3) >; @@ -701,7 +701,7 @@ def : Ext32Pat <anyext>; def : Pat < (AMDGPUurecip i32:$src0), (V_CVT_U32_F32_e32 - (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1, + (V_MUL_F32_e32 (i32 CONST.FP_UINT_MAX_PLUS_1), (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0)))) >; @@ -767,32 +767,37 @@ def : Pat < //===----------------------------------------------------------------------===// def : Pat<(i32 (sext_inreg i32:$src, i1)), - (S_BFE_I32 i32:$src, 65536)>; // 0 | 1 << 16 + (S_BFE_I32 i32:$src, (i32 65536))>; // 0 | 1 << 16 // Handle sext_inreg in i64 def : Pat < (i64 (sext_inreg i64:$src, i1)), - (S_BFE_I64 i64:$src, 0x10000) // 0 | 1 << 16 + (S_BFE_I64 i64:$src, (i32 0x10000)) // 0 | 1 << 16 +>; + +def : Pat < + (i16 (sext_inreg i16:$src, i8)), + (S_BFE_I32 $src, (i32 0x80000)) // 0 | 8 << 16 >; def : Pat < (i64 (sext_inreg i64:$src, i8)), - (S_BFE_I64 i64:$src, 0x80000) // 0 | 8 << 16 + (S_BFE_I64 i64:$src, (i32 0x80000)) // 0 | 8 << 16 >; def : Pat < (i64 (sext_inreg i64:$src, i16)), - (S_BFE_I64 i64:$src, 0x100000) // 0 | 16 << 16 + (S_BFE_I64 i64:$src, (i32 0x100000)) // 0 | 16 << 16 >; def : Pat < (i64 (sext_inreg i64:$src, i32)), - (S_BFE_I64 i64:$src, 0x200000) // 0 | 32 << 16 + (S_BFE_I64 i64:$src, (i32 0x200000)) // 0 | 32 << 16 >; def : Pat < (i64 (zext i32:$src)), - (REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 0), sub1) + (REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 (i32 0)), sub1) >; def : Pat < @@ -804,7 +809,7 @@ class ZExt_i64_i1_Pat <SDNode ext> : Pat < (i64 (ext i1:$src)), (REG_SEQUENCE VReg_64, (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0, - (S_MOV_B32 0), sub1) + (S_MOV_B32 (i32 0)), sub1) >; @@ -816,25 +821,25 @@ def : ZExt_i64_i1_Pat<anyext>; def : Pat < (i64 (sext i32:$src)), (REG_SEQUENCE SReg_64, $src, sub0, - (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, 31), SReg_32_XM0)), sub1) + (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, (i32 31)), SReg_32_XM0)), sub1) >; def : Pat < (i64 (sext i1:$src)), (REG_SEQUENCE VReg_64, - (V_CNDMASK_B32_e64 0, -1, $src), sub0, - (V_CNDMASK_B32_e64 0, -1, $src), sub1) + (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub0, + (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub1) >; -class FPToI1Pat<Instruction Inst, int KOne, ValueType vt, SDPatternOperator fp_to_int> : Pat < +class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, SDPatternOperator fp_to_int> : Pat < (i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))), - (i1 (Inst 0, KOne, $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)) + (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)) >; -def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, f32, fp_to_uint>; -def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, f32, fp_to_sint>; -def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, f64, fp_to_uint>; -def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, f64, fp_to_sint>; +def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>; +def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, i32, f32, fp_to_sint>; +def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, i64, f64, fp_to_uint>; +def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, i64, f64, fp_to_sint>; // If we need to perform a logical operation on i1 values, we need to // use vector comparisons since there is only one SCC register. Vector @@ -859,12 +864,12 @@ def : Pat < def : Pat < (f32 (sint_to_fp i1:$src)), - (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_NEG_ONE, $src) + (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src) >; def : Pat < (f32 (uint_to_fp i1:$src)), - (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_ONE, $src) + (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src) >; def : Pat < @@ -888,20 +893,20 @@ def : Pat < def : Pat < (i1 (trunc i32:$a)), - (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), 1) + (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1)) >; def : Pat < (i1 (trunc i64:$a)), (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), - (EXTRACT_SUBREG $a, sub0)), 1) + (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1)) >; def : Pat < (i32 (bswap i32:$a)), - (V_BFI_B32 (S_MOV_B32 0x00ff00ff), - (V_ALIGNBIT_B32 $a, $a, 24), - (V_ALIGNBIT_B32 $a, $a, 8)) + (V_BFI_B32 (S_MOV_B32 (i32 0x00ff00ff)), + (V_ALIGNBIT_B32 $a, $a, (i32 24)), + (V_ALIGNBIT_B32 $a, $a, (i32 8))) >; def : Pat < @@ -917,7 +922,7 @@ multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> { def : Pat < (vt (add (vt (shl 1, vt:$a)), -1)), - (BFM $a, (MOV 0)) + (BFM $a, (MOV (i32 0))) >; } @@ -928,7 +933,7 @@ def : BFEPattern <V_BFE_U32, S_MOV_B32>; def : Pat< (fcanonicalize f32:$src), - (V_MUL_F32_e64 0, CONST.FP32_ONE, 0, $src, 0, 0) + (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), 0, $src, 0, 0) >; def : Pat< @@ -963,7 +968,7 @@ def : Pat < (V_MOV_B64_PSEUDO 0x3fefffffffffffff), DSTCLAMP.NONE, DSTOMOD.NONE), $x, - (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)), + (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, (i32 3 /*NaN*/))), DSTCLAMP.NONE, DSTOMOD.NONE) >; |

