summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-01-11 17:02:06 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-01-11 17:02:06 +0000
commit5319b0add5b4fd81aa17b54e755b5d7ead11d83f (patch)
tree4fb16c0c6dc5811c796a1b0f71ef13bea60cc053 /llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
parentde5fbe9c60d88308e5eabfe0360ba66863c09768 (diff)
downloadbcm5719-llvm-5319b0add5b4fd81aa17b54e755b5d7ead11d83f.tar.gz
bcm5719-llvm-5319b0add5b4fd81aa17b54e755b5d7ead11d83f.zip
AMDGPU: Fix ctlz combine for sub 32-bit types
llvm-svn: 257353
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp30
1 files changed, 24 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index fafe58d65b1..8f63fd61571 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2521,6 +2521,27 @@ static bool isCtlzOpc(unsigned Opc) {
return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
}
+// Get FFBH node if the incoming op may have been type legalized from a smaller
+// type VT.
+// Need to match pre-legalized type because the generic legalization inserts the
+// add/sub between the select and compare.
+static SDValue getFFBH_U32(const TargetLowering &TLI,
+ SelectionDAG &DAG, SDLoc SL, SDValue Op) {
+ EVT VT = Op.getValueType();
+ EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ if (LegalVT != MVT::i32)
+ return SDValue();
+
+ if (VT != MVT::i32)
+ Op = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Op);
+
+ SDValue FFBH = DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, Op);
+ if (VT != MVT::i32)
+ FFBH = DAG.getNode(ISD::TRUNCATE, SL, VT, FFBH);
+
+ return FFBH;
+}
+
// The native instructions return -1 on 0 input. Optimize out a select that
// produces -1 on 0.
//
@@ -2546,7 +2567,7 @@ SDValue AMDGPUTargetLowering::performCtlzCombine(SDLoc SL,
isCtlzOpc(RHS.getOpcode()) &&
RHS.getOperand(0) == CmpLHS &&
isNegativeOne(LHS)) {
- return DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, CmpLHS);
+ return getFFBH_U32(*this, DAG, SL, CmpLHS);
}
// select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
@@ -2554,7 +2575,7 @@ SDValue AMDGPUTargetLowering::performCtlzCombine(SDLoc SL,
isCtlzOpc(LHS.getOpcode()) &&
LHS.getOperand(0) == CmpLHS &&
isNegativeOne(RHS)) {
- return DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, CmpLHS);
+ return getFFBH_U32(*this, DAG, SL, CmpLHS);
}
return SDValue();
@@ -2578,10 +2599,7 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
return CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
// There's no reason to not do this if the condition has other uses.
- if (VT == MVT::i32)
- return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
-
- return SDValue();
+ return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
}
SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
OpenPOWER on IntegriCloud