summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2018-08-12 08:42:54 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2018-08-12 08:42:54 +0000
commitb5acec1f79deaae964b981a52ce8ed2ce3a01359 (patch)
treea667a04b05b5103e6253c0e8b4c06c163eed9d99 /llvm/lib
parent3ead7d73893dd6544651dc5cf6989abd7fe4f4bb (diff)
downloadbcm5719-llvm-b5acec1f79deaae964b981a52ce8ed2ce3a01359.tar.gz
bcm5719-llvm-b5acec1f79deaae964b981a52ce8ed2ce3a01359.zip
AMDGPU: Use splat vectors for undefs when folding canonicalize
If one of the elements is undef, use the canonicalized constant from the other element instead of 0. Splat vectors are more useful for other optimizations, such as matching vector clamps. This was breaking on clamps of half3 from the undef 4th component. llvm-svn: 339512
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp25
1 files changed, 20 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 226cfeb613b..ba8a3a512cb 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6989,27 +6989,42 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
// TODO: This could be better with wider vectors that will be split to v2f16,
// and to consider uses since there aren't that many packed operations.
- if (N0.getOpcode() == ISD::BUILD_VECTOR && VT == MVT::v2f16) {
+ if (N0.getOpcode() == ISD::BUILD_VECTOR && VT == MVT::v2f16 &&
+ isTypeLegal(MVT::v2f16)) {
SDLoc SL(N);
SDValue NewElts[2];
SDValue Lo = N0.getOperand(0);
SDValue Hi = N0.getOperand(1);
+ EVT EltVT = Lo.getValueType();
+
if (vectorEltWillFoldAway(Lo) || vectorEltWillFoldAway(Hi)) {
for (unsigned I = 0; I != 2; ++I) {
SDValue Op = N0.getOperand(I);
- EVT EltVT = Op.getValueType();
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
NewElts[I] = getCanonicalConstantFP(DAG, SL, EltVT,
CFP->getValueAPF());
} else if (Op.isUndef()) {
- // This would ordinarily be folded to a qNaN. Since this may be half
- // of a packed operation, it may be cheaper to use a 0.
- NewElts[I] = DAG.getConstantFP(0.0f, SL, EltVT);
+ // Handled below based on what the other operand is.
+ NewElts[I] = Op;
} else {
NewElts[I] = DAG.getNode(ISD::FCANONICALIZE, SL, EltVT, Op);
}
}
+ // If one half is undef, and one is constant, perfer a splat vector rather
+ // than the normal qNaN. If it's a register, prefer 0.0 since that's
+ // cheaper to use and may be free with a packed operation.
+ if (NewElts[0].isUndef()) {
+ if (isa<ConstantFPSDNode>(NewElts[1]))
+ NewElts[0] = isa<ConstantFPSDNode>(NewElts[1]) ?
+ NewElts[1]: DAG.getConstantFP(0.0f, SL, EltVT);
+ }
+
+ if (NewElts[1].isUndef()) {
+ NewElts[1] = isa<ConstantFPSDNode>(NewElts[0]) ?
+ NewElts[0] : DAG.getConstantFP(0.0f, SL, EltVT);
+ }
+
return DAG.getBuildVector(VT, SL, NewElts);
}
}
OpenPOWER on IntegriCloud