diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-08-05 15:59:07 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-08-05 15:59:07 +0000 |
| commit | 392239296993090fd49d7a665e933ffc52e2068a (patch) | |
| tree | ed1fc454c4ac29d50ec4a980feec9855d149dfa5 /llvm/lib | |
| parent | d884fbde2a829ef8c78d1a0b0f2478e7e672bc0a (diff) | |
| download | bcm5719-llvm-392239296993090fd49d7a665e933ffc52e2068a.tar.gz bcm5719-llvm-392239296993090fd49d7a665e933ffc52e2068a.zip | |
AMDGPU: Correct behavior of f16 buffer loads
Don't assume format loads for f16. Also fixes support for targets
without i16.
llvm-svn: 367879
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 99 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 5 |
5 files changed, 72 insertions, 45 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index b4849b2881e..8029b8968b3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2002,6 +2002,12 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { dbgs() << "\n"); SDValue R = SDValue(); + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) { + LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n"); + return; + } + switch (N->getOpcode()) { // These opcodes cannot appear if promotion of FP16 is done in the backend // instead of Clang diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 504d391ad45..e3b55c2223e 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1182,8 +1182,12 @@ let SubtargetPredicate = HasPackedD16VMem in { defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, f32, "BUFFER_LOAD_DWORD">; defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, i32, "BUFFER_LOAD_DWORD">; +defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2i16, "BUFFER_LOAD_DWORD">; +defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f16, "BUFFER_LOAD_DWORD">; defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f32, "BUFFER_LOAD_DWORDX2">; defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2i32, "BUFFER_LOAD_DWORDX2">; +defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4i16, "BUFFER_LOAD_DWORDX2">; +defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f16, "BUFFER_LOAD_DWORDX2">; defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v3f32, "BUFFER_LOAD_DWORDX3">; defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v3i32, "BUFFER_LOAD_DWORDX3">; defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f32, "BUFFER_LOAD_DWORDX4">; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 65568667b4d..f6d80eb4aa1 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -675,9 +675,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2i16, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4i16, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::f16, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); @@ -4119,6 +4122,41 @@ SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode, return DAG.getMergeValues({ Adjusted, Load.getValue(1) }, DL); } +SDValue SITargetLowering::lowerIntrinsicLoad(MemSDNode *M, bool IsFormat, + SelectionDAG &DAG, + ArrayRef<SDValue> Ops) const { + SDLoc DL(M); + EVT LoadVT = M->getValueType(0); + EVT EltType = LoadVT.getScalarType(); + EVT IntVT = LoadVT.changeTypeToInteger(); + + bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16); + + unsigned Opc = + IsFormat ? AMDGPUISD::BUFFER_LOAD_FORMAT : AMDGPUISD::BUFFER_LOAD; + + if (IsD16) { + return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, M, DAG, Ops); + } + + // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics + if (!IsD16 && !LoadVT.isVector() && EltType.getSizeInBits() < 32) + return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M); + + if (isTypeLegal(LoadVT)) { + return getMemIntrinsicNode(Opc, DL, M->getVTList(), Ops, IntVT, + M->getMemOperand(), DAG); + } + + EVT CastVT = getEquivalentMemType(*DAG.getContext(), LoadVT); + SDVTList VTList = DAG.getVTList(CastVT, MVT::Other); + SDValue MemNode = getMemIntrinsicNode(Opc, DL, VTList, Ops, CastVT, + M->getMemOperand(), DAG); + return DAG.getMergeValues( + {DAG.getNode(ISD::BITCAST, DL, LoadVT, MemNode), MemNode.getValue(1)}, + DL); +} + static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); @@ -4245,8 +4283,14 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N, } case ISD::INTRINSIC_W_CHAIN: { if (SDValue Res = LowerINTRINSIC_W_CHAIN(SDValue(N, 0), DAG)) { - Results.push_back(Res); - Results.push_back(Res.getValue(1)); + if (Res.getOpcode() == ISD::MERGE_VALUES) { + // FIXME: Hacky + Results.push_back(Res.getOperand(0)); + Results.push_back(Res.getOperand(1)); + } else { + Results.push_back(Res); + Results.push_back(Res.getValue(1)); + } return; } @@ -6194,6 +6238,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, } case Intrinsic::amdgcn_raw_buffer_load: case Intrinsic::amdgcn_raw_buffer_load_format: { + const bool IsFormat = IntrID == Intrinsic::amdgcn_raw_buffer_load_format; + auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -6206,28 +6252,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, DAG.getConstant(0, DL, MVT::i1), // idxen }; - unsigned Opc = (IntrID == Intrinsic::amdgcn_raw_buffer_load) ? - AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT; - - EVT VT = Op.getValueType(); - EVT IntVT = VT.changeTypeToInteger(); - auto *M = cast<MemSDNode>(Op); - EVT LoadVT = Op.getValueType(); - - if (LoadVT.getScalarType() == MVT::f16) - return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, - M, DAG, Ops); - - // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics - if (LoadVT.getScalarType() == MVT::i8 || - LoadVT.getScalarType() == MVT::i16) - return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M); - - return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, - M->getMemOperand(), DAG); + return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops); } case Intrinsic::amdgcn_struct_buffer_load: case Intrinsic::amdgcn_struct_buffer_load_format: { + const bool IsFormat = IntrID == Intrinsic::amdgcn_struct_buffer_load_format; + auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -6240,25 +6270,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, DAG.getConstant(1, DL, MVT::i1), // idxen }; - unsigned Opc = (IntrID == Intrinsic::amdgcn_struct_buffer_load) ? - AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT; - - EVT VT = Op.getValueType(); - EVT IntVT = VT.changeTypeToInteger(); - auto *M = cast<MemSDNode>(Op); - EVT LoadVT = Op.getValueType(); - - if (LoadVT.getScalarType() == MVT::f16) - return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, - M, DAG, Ops); - - // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics - if (LoadVT.getScalarType() == MVT::i8 || - LoadVT.getScalarType() == MVT::i16) - return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M); - - return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, - M->getMemOperand(), DAG); + return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops); } case Intrinsic::amdgcn_tbuffer_load: { MemSDNode *M = cast<MemSDNode>(Op); @@ -7123,9 +7135,10 @@ SDValue SITargetLowering::handleByteShortBufferLoads(SelectionDAG &DAG, SDValue BufferLoad = DAG.getMemIntrinsicNode(Opc, DL, ResList, Ops, IntVT, M->getMemOperand()); - SDValue BufferLoadTrunc = DAG.getNode(ISD::TRUNCATE, DL, - LoadVT.getScalarType(), BufferLoad); - return DAG.getMergeValues({BufferLoadTrunc, BufferLoad.getValue(1)}, DL); + SDValue LoadVal = DAG.getNode(ISD::TRUNCATE, DL, IntVT, BufferLoad); + LoadVal = DAG.getNode(ISD::BITCAST, DL, LoadVT, LoadVal); + + return DAG.getMergeValues({LoadVal, BufferLoad.getValue(1)}, DL); } // Handle 8 bit and 16 bit buffer stores diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 410460226a2..4e98b2aab3a 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -94,6 +94,9 @@ private: SelectionDAG &DAG, ArrayRef<SDValue> Ops, bool IsIntrinsic = false) const; + SDValue lowerIntrinsicLoad(MemSDNode *M, bool IsFormat, SelectionDAG &DAG, + ArrayRef<SDValue> Ops) const; + // Call DAG.getMemIntrinsicNode for a load, but first widen a dwordx3 type to // dwordx4 if on SI. SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 7f287b41416..cce131b9bcd 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -919,9 +919,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); } - if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()){ + if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) { setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); - setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); + if (Subtarget->hasFullFP16()) + setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); } if (!Subtarget->hasFP16()) |

