diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-08 20:14:46 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-08 20:14:46 +0000 |
commit | e96d03745d55c18b9afa7f581de06297bf1153b0 (patch) | |
tree | 5e8e8c91da3915e73ab09bbcbce9bab71c66b4e1 /llvm/lib/Target | |
parent | 73b54f413430ec69547b7bf25e2bce7a9ab1d0c9 (diff) | |
download | bcm5719-llvm-e96d03745d55c18b9afa7f581de06297bf1153b0.tar.gz bcm5719-llvm-e96d03745d55c18b9afa7f581de06297bf1153b0.zip |
AMDGPU: Make f16 ConstantFP legal
Not having this legal led to combine failures, resulting
in dumb things like bitcasts of constants not being folded
away.
The only reason I'm leaving the v_mov_b32 hack that f32
already uses is to avoid madak formation test regressions.
PeepholeOptimizer has an ordering issue where the immediate
fold attempt is into the sgpr->vgpr copy instead of the actual
use. Running it twice avoids that problem.
llvm-svn: 289096
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 13 |
3 files changed, 14 insertions, 16 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index a0184bfefd0..eeab4821e50 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -277,7 +277,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); // F16 - Constant Actions. - setOperationAction(ISD::ConstantFP, MVT::f16, Custom); + setOperationAction(ISD::ConstantFP, MVT::f16, Legal); // F16 - Load/Store Actions. setOperationAction(ISD::LOAD, MVT::f16, Promote); @@ -1848,9 +1848,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG); case ISD::ADDRSPACECAST: return lowerADDRSPACECAST(Op, DAG); case ISD::TRAP: return lowerTRAP(Op, DAG); - - case ISD::ConstantFP: - return lowerConstantFP(Op, DAG); case ISD::FP_ROUND: return lowerFP_ROUND(Op, DAG); } @@ -2055,15 +2052,6 @@ SDValue SITargetLowering::getFPExtOrFPTrunc(SelectionDAG &DAG, DAG.getNode(ISD::FTRUNC, DL, VT, Op); } -SDValue SITargetLowering::lowerConstantFP(SDValue Op, SelectionDAG &DAG) const { - if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(Op)) { - return DAG.getConstant(FP->getValueAPF().bitcastToAPInt().getZExtValue(), - SDLoc(Op), MVT::i32); - } - - return SDValue(); -} - SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::f16 && "Do not know how to custom lower FP_ROUND for non-f16 type"); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 56d6ef2a0c1..cb6d5364793 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -53,9 +53,6 @@ class SITargetLowering final : public AMDGPUTargetLowering { const SDLoc &DL, EVT VT) const; - /// \brief Custom lowering for ISD::ConstantFP. - SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const; - /// \brief Custom lowering for ISD::FP_ROUND for MVT::f16. SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 7ca50968096..93e7bcd02a1 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -706,12 +706,25 @@ def : Pat < (S_MOV_B32 imm:$imm) >; +// FIXME: Workaround for ordering issue with peephole optimizer where +// a register class copy interferes with immediate folding. Should +// use s_mov_b32, which can be shrunk to s_movk_i32 +def : Pat < + (VGPRImm<(f16 fpimm)>:$imm), + (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm))) +>; + def : Pat < (f32 fpimm:$imm), (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm))) >; def : Pat < + (f16 fpimm:$imm), + (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm))) +>; + +def : Pat < (i32 frameindex:$fi), (V_MOV_B32_e32 (i32 (frameindex_to_targetframeindex $fi))) >; |