diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-11-13 00:22:09 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-11-13 00:22:09 +0000 |
| commit | e1cd482fda78c18d4242f7e834686d2f38c86854 (patch) | |
| tree | 5288ecf0096bffcd32619e28922529bb826f7773 /llvm/lib/Target | |
| parent | 70b92820158781ae42d89568b15873e74871e59f (diff) | |
| download | bcm5719-llvm-e1cd482fda78c18d4242f7e834686d2f38c86854.tar.gz bcm5719-llvm-e1cd482fda78c18d4242f7e834686d2f38c86854.zip | |
AMDGPU: Select d16 loads into low component of register
llvm-svn: 318005
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 33 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 23 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 40 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/DSInstructions.td | 18 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/FLATInstructions.td | 35 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 3 |
6 files changed, 147 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index d2562615735..86a0dab30ea 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -201,6 +201,8 @@ private: bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectHi16Elt(SDValue In, SDValue &Src) const; + void SelectADD_SUB_I64(SDNode *N); void SelectUADDO_USUBO(SDNode *N); void SelectDIV_SCALE(SDNode *N); @@ -1134,8 +1136,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { unsigned Imm = CAddr->getZExtValue(); - assert(!SIInstrInfo::isLegalMUBUFImmOffset(Imm) && - "should have been selected by other pattern"); SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, @@ -2024,6 +2024,35 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src, return true; } +// TODO: Can we identify things like v_mad_mixhi_f16? +bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const { + if (In.isUndef()) { + Src = In; + return true; + } + + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) { + SDLoc SL(In); + SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32); + MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, + SL, MVT::i32, K); + Src = SDValue(MovK, 0); + return true; + } + + if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) { + SDLoc SL(In); + SDValue K = CurDAG->getTargetConstant( + C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32); + MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, + SL, MVT::i32, K); + Src = SDValue(MovK, 0); + return true; + } + + return isExtractHiElt(In, Src); +} + void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 6498aafc6ac..c14679701c0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -133,6 +133,29 @@ def shl_oneuse : HasOneUseBinOp<shl>; def select_oneuse : HasOneUseTernaryOp<select>; +def srl_16 : PatFrag< + (ops node:$src0), (srl_oneuse node:$src0, (i32 16)) +>; + + +def hi_i16_elt : PatFrag< + (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0)))) +>; + + +def hi_f16_elt : PatLeaf< + (vt), [{ + if (N->getOpcode() != ISD::BITCAST) + return false; + SDValue Tmp = N->getOperand(0); + + if (Tmp.getOpcode() != ISD::SRL) + return false; + if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1)) + return RHS->getZExtValue() == 16; + return false; +}]>; + //===----------------------------------------------------------------------===// // PatLeafs for floating-point comparisons //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index dc4257637a7..2230457b3a9 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -811,7 +811,7 @@ def BUFFER_WBINVL1_SC : MUBUF_Invalidate <"buffer_wbinvl1_sc", let SubtargetPredicate = HasD16LoadStore in { defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Pseudo_Loads < - "buffer_load_ubyte_d16", VGPR_32, i32 + "buffer_load_ubyte_d16", VGPR_32, i32, null_frag, 1 >; defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Pseudo_Loads < @@ -819,7 +819,7 @@ defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Pseudo_Loads < >; defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Pseudo_Loads < - "buffer_load_sbyte_d16", VGPR_32, i32 + "buffer_load_sbyte_d16", VGPR_32, i32, null_frag, 1 >; defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Pseudo_Loads < @@ -827,7 +827,7 @@ defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Pseudo_Loads < >; defm BUFFER_LOAD_SHORT_D16 : MUBUF_Pseudo_Loads < - "buffer_load_short_d16", VGPR_32, i32 + "buffer_load_short_d16", VGPR_32, i32, null_frag, 1 >; defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Pseudo_Loads < @@ -1169,6 +1169,36 @@ multiclass MUBUFScratchLoadPat_Hi16 <MUBUF_Pseudo InstrOffen, >; } +multiclass MUBUFScratchLoadPat_Lo16 <MUBUF_Pseudo InstrOffen, + MUBUF_Pseudo InstrOffset, + ValueType vt, PatFrag ld> { + def : GCNPat < + (build_vector (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, + i32:$soffset, u16imm:$offset))), + (vt (Hi16Elt vt:$hi))), + (v2i16 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $hi)) + >; + + def : GCNPat < + (build_vector (f16 (bitconvert (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, + i32:$soffset, u16imm:$offset))))), + (f16 (Hi16Elt f16:$hi))), + (v2f16 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $hi)) + >; + + def : GCNPat < + (build_vector (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))), + (vt (Hi16Elt vt:$hi))), + (v2i16 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $hi)) + >; + + def : GCNPat < + (build_vector (f16 (bitconvert (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))))), + (f16 (Hi16Elt f16:$hi))), + (v2f16 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $hi)) + >; +} + defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i32, sextloadi8_private>; defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, az_extloadi8_private>; defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_private>; @@ -1184,6 +1214,10 @@ let OtherPredicates = [HasD16LoadStore] in { defm : MUBUFScratchLoadPat_Hi16<BUFFER_LOAD_SHORT_D16_HI_OFFEN, BUFFER_LOAD_SHORT_D16_HI_OFFSET, i16, load_private>; defm : MUBUFScratchLoadPat_Hi16<BUFFER_LOAD_UBYTE_D16_HI_OFFEN, BUFFER_LOAD_UBYTE_D16_HI_OFFSET, i16, az_extloadi8_private>; defm : MUBUFScratchLoadPat_Hi16<BUFFER_LOAD_SBYTE_D16_HI_OFFEN, BUFFER_LOAD_SBYTE_D16_HI_OFFSET, i16, sextloadi8_private>; + +defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D16_OFFSET, i16, load_private>; +defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, i16, az_extloadi8_private>; +defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, i16, sextloadi8_private>; } // BUFFER_LOAD_DWORD*, addr64=0 diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 15260d0bae1..4fc7e92721c 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -559,6 +559,19 @@ multiclass DSReadPat_Hi16 <DS_Pseudo inst, PatFrag frag, ValueType vt = i16> { >; } +multiclass DSReadPat_Lo16 <DS_Pseudo inst, PatFrag frag, ValueType vt = i16> { + def : GCNPat < + (build_vector (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))), (vt (Hi16Elt vt:$hi))), + (v2i16 (inst $ptr, (as_i16imm $offset), 0, $hi)) + >; + + def : GCNPat < + (build_vector (f16 (bitconvert (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))))), (f16 (Hi16Elt f16:$hi))), + (v2f16 (inst $ptr, (as_i16imm $offset), 0, $hi)) + >; +} + + def : DSReadPat <DS_READ_I8, i32, sextloadi8_local_m0>; def : DSReadPat <DS_READ_U8, i32, az_extloadi8_local_m0>; def : DSReadPat <DS_READ_I8, i16, sextloadi8_local_m0>; @@ -587,6 +600,11 @@ let AddedComplexity = 100 in { defm : DSReadPat_Hi16<DS_READ_U16_D16_HI, load_local>; defm : DSReadPat_Hi16<DS_READ_U8_D16_HI, az_extloadi8_local>; defm : DSReadPat_Hi16<DS_READ_I8_D16_HI, sextloadi8_local>; + +defm : DSReadPat_Lo16<DS_READ_U16_D16, load_local>; +defm : DSReadPat_Lo16<DS_READ_U8_D16, az_extloadi8_local>; +defm : DSReadPat_Lo16<DS_READ_I8_D16, sextloadi8_local>; + } } diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index af0147f69ef..6413d6d714c 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -654,6 +654,30 @@ multiclass FlatSignedLoadPat_Hi16 <FLAT_Pseudo inst, SDPatternOperator node, Val >; } +multiclass FlatLoadPat_Lo16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt = i16> { + def : GCNPat < + (build_vector (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))), (vt (Hi16Elt vt:$hi))), + (v2i16 (inst $vaddr, $offset, 0, $slc, $hi)) + >; + + def : GCNPat < + (build_vector (f16 (bitconvert (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))))), (f16 (Hi16Elt f16:$hi))), + (v2f16 (inst $vaddr, $offset, 0, $slc, $hi)) + >; +} + +multiclass FlatSignedLoadPat_Lo16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt = i16> { + def : GCNPat < + (build_vector (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))), (vt (Hi16Elt vt:$hi))), + (v2i16 (inst $vaddr, $offset, 0, $slc, $hi)) + >; + + def : GCNPat < + (build_vector (f16 (bitconvert (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))))), (f16 (Hi16Elt f16:$hi))), + (v2f16 (inst $vaddr, $offset, 0, $slc, $hi)) + >; +} + class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))), (inst $vaddr, $offset, 0, $slc) @@ -765,6 +789,12 @@ defm : FlatLoadPat_Hi16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_flat>; defm : FlatLoadPat_Hi16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_flat>; defm : FlatLoadPat_Hi16 <FLAT_LOAD_SHORT_D16_HI, load_flat>; } + +let AddedComplexity = 9 in { +defm : FlatLoadPat_Lo16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_flat>; +defm : FlatLoadPat_Lo16 <FLAT_LOAD_SBYTE_D16, sextloadi8_flat>; +defm : FlatLoadPat_Lo16 <FLAT_LOAD_SHORT_D16, load_flat>; +} } } // End OtherPredicates = [HasFlatAddressSpace] @@ -801,6 +831,11 @@ def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i3 defm : FlatSignedLoadPat_Hi16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_global>; defm : FlatSignedLoadPat_Hi16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_global>; defm : FlatSignedLoadPat_Hi16 <GLOBAL_LOAD_SHORT_D16_HI, load_global>; + +defm : FlatSignedLoadPat_Lo16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_global>; +defm : FlatSignedLoadPat_Lo16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_global>; +defm : FlatSignedLoadPat_Lo16 <GLOBAL_LOAD_SHORT_D16, load_global>; + } def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, store_atomic_global, i32>; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index aad965dc6ea..1a236659644 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -843,6 +843,9 @@ def VOP3OpSelMods0 : ComplexPattern<untyped, 3, "SelectVOP3OpSelMods0">; def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">; + +def Hi16Elt : ComplexPattern<untyped, 1, "SelectHi16Elt">; + //===----------------------------------------------------------------------===// // SI assembler operands //===----------------------------------------------------------------------===// |

