diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-01-11 17:02:06 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-01-11 17:02:06 +0000 |
commit | 5319b0add5b4fd81aa17b54e755b5d7ead11d83f (patch) | |
tree | 4fb16c0c6dc5811c796a1b0f71ef13bea60cc053 /llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | |
parent | de5fbe9c60d88308e5eabfe0360ba66863c09768 (diff) | |
download | bcm5719-llvm-5319b0add5b4fd81aa17b54e755b5d7ead11d83f.tar.gz bcm5719-llvm-5319b0add5b4fd81aa17b54e755b5d7ead11d83f.zip |
AMDGPU: Fix ctlz combine for sub 32-bit types
llvm-svn: 257353
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 30 |
1 files changed, 24 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index fafe58d65b1..8f63fd61571 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2521,6 +2521,27 @@ static bool isCtlzOpc(unsigned Opc) { return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF; } +// Get FFBH node if the incoming op may have been type legalized from a smaller +// type VT. +// Need to match pre-legalized type because the generic legalization inserts the +// add/sub between the select and compare. +static SDValue getFFBH_U32(const TargetLowering &TLI, + SelectionDAG &DAG, SDLoc SL, SDValue Op) { + EVT VT = Op.getValueType(); + EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + if (LegalVT != MVT::i32) + return SDValue(); + + if (VT != MVT::i32) + Op = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Op); + + SDValue FFBH = DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, Op); + if (VT != MVT::i32) + FFBH = DAG.getNode(ISD::TRUNCATE, SL, VT, FFBH); + + return FFBH; +} + // The native instructions return -1 on 0 input. Optimize out a select that // produces -1 on 0. // @@ -2546,7 +2567,7 @@ SDValue AMDGPUTargetLowering::performCtlzCombine(SDLoc SL, isCtlzOpc(RHS.getOpcode()) && RHS.getOperand(0) == CmpLHS && isNegativeOne(LHS)) { - return DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, CmpLHS); + return getFFBH_U32(*this, DAG, SL, CmpLHS); } // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x @@ -2554,7 +2575,7 @@ SDValue AMDGPUTargetLowering::performCtlzCombine(SDLoc SL, isCtlzOpc(LHS.getOpcode()) && LHS.getOperand(0) == CmpLHS && isNegativeOne(RHS)) { - return DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, CmpLHS); + return getFFBH_U32(*this, DAG, SL, CmpLHS); } return SDValue(); @@ -2578,10 +2599,7 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N, return CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI); // There's no reason to not do this if the condition has other uses. - if (VT == MVT::i32) - return performCtlzCombine(SDLoc(N), Cond, True, False, DCI); - - return SDValue(); + return performCtlzCombine(SDLoc(N), Cond, True, False, DCI); } SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, |