AMDGPU: Use splat vectors for undefs when folding canonicalize

If one of the elements is undef, use the canonicalized constant from the other element instead of 0. Splat vectors are more useful for other optimizations, such as matching vector clamps. This was breaking on clamps of half3 from the undef 4th component. llvm-svn: 339512
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2018-08-12 08:42:54 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2018-08-12 08:42:54 +0000
commit: b5acec1f79deaae964b981a52ce8ed2ce3a01359 (patch)
tree: a667a04b05b5103e6253c0e8b4c06c163eed9d99 /llvm/lib
parent: 3ead7d73893dd6544651dc5cf6989abd7fe4f4bb (diff)
download: bcm5719-llvm-b5acec1f79deaae964b981a52ce8ed2ce3a01359.tar.gz
bcm5719-llvm-b5acec1f79deaae964b981a52ce8ed2ce3a01359.zip
1 files changed, 20 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 226cfeb613b..ba8a3a512cb 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6989,27 +6989,42 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
 
   // TODO: This could be better with wider vectors that will be split to v2f16,
   // and to consider uses since there aren't that many packed operations.
-  if (N0.getOpcode() == ISD::BUILD_VECTOR && VT == MVT::v2f16) {
+  if (N0.getOpcode() == ISD::BUILD_VECTOR && VT == MVT::v2f16 &&
+      isTypeLegal(MVT::v2f16)) {
     SDLoc SL(N);
     SDValue NewElts[2];
     SDValue Lo = N0.getOperand(0);
     SDValue Hi = N0.getOperand(1);
+    EVT EltVT = Lo.getValueType();
+
     if (vectorEltWillFoldAway(Lo) || vectorEltWillFoldAway(Hi)) {
       for (unsigned I = 0; I != 2; ++I) {
         SDValue Op = N0.getOperand(I);
-        EVT EltVT = Op.getValueType();
         if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
           NewElts[I] = getCanonicalConstantFP(DAG, SL, EltVT,
                                               CFP->getValueAPF());
         } else if (Op.isUndef()) {
-          // This would ordinarily be folded to a qNaN. Since this may be half
-          // of a packed operation, it may be cheaper to use a 0.
-          NewElts[I] = DAG.getConstantFP(0.0f, SL, EltVT);
+          // Handled below based on what the other operand is.
+          NewElts[I] = Op;
         } else {
           NewElts[I] = DAG.getNode(ISD::FCANONICALIZE, SL, EltVT, Op);
         }
       }
 
+      // If one half is undef, and one is constant, perfer a splat vector rather
+      // than the normal qNaN. If it's a register, prefer 0.0 since that's
+      // cheaper to use and may be free with a packed operation.
+      if (NewElts[0].isUndef()) {
+        if (isa<ConstantFPSDNode>(NewElts[1]))
+          NewElts[0] = isa<ConstantFPSDNode>(NewElts[1]) ?
+            NewElts[1]: DAG.getConstantFP(0.0f, SL, EltVT);
+      }
+
+      if (NewElts[1].isUndef()) {
+        NewElts[1] = isa<ConstantFPSDNode>(NewElts[0]) ?
+          NewElts[0] : DAG.getConstantFP(0.0f, SL, EltVT);
+      }
+
       return DAG.getBuildVector(VT, SL, NewElts);
     }
   }
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2018-08-12 08:42:54 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2018-08-12 08:42:54 +0000
commit	b5acec1f79deaae964b981a52ce8ed2ce3a01359 (patch)
tree	a667a04b05b5103e6253c0e8b4c06c163eed9d99 /llvm/lib
parent	3ead7d73893dd6544651dc5cf6989abd7fe4f4bb (diff)
download	bcm5719-llvm-b5acec1f79deaae964b981a52ce8ed2ce3a01359.tar.gz bcm5719-llvm-b5acec1f79deaae964b981a52ce8ed2ce3a01359.zip