[Power9] Ensure float128 in non-homogenous aggregates are passed via VSX reg

Non-homogenous aggregates are passed in consecutive GPRs, in GPRs and in memory, or in memory. This patch ensures that float128 members of non-homogenous aggregates are passed via VSX registers. This is done via custom lowering a bitcast of a build_pari(i64,i64) to float128 to a new PPCISD node, BUILD_FP128. Differential Revision: https://reviews.llvm.org/D48308 llvm-svn: 336310
author: Lei Huang <lei@ca.ibm.com> 2018-07-05 06:21:37 +0000
committer: Lei Huang <lei@ca.ibm.com> 2018-07-05 06:21:37 +0000
commit: a855e17f096d29e766362aa6e96ffe6d0c886ca2 (patch)
tree: 79b12b8c74c0e75613e34d56fe02c14467520a3b /llvm/lib/Target/PowerPC/PPCISelLowering.cpp
parent: 2161ec7ee2c4bc5e02e9c4de8a3857b37dba0839 (diff)
download: bcm5719-llvm-a855e17f096d29e766362aa6e96ffe6d0c886ca2.tar.gz
bcm5719-llvm-a855e17f096d29e766362aa6e96ffe6d0c886ca2.zip
1 files changed, 21 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 705e1e07710..18d94ec8a50 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -814,6 +814,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
         setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
         setTruncStoreAction(MVT::f128, MVT::f64, Expand);
         setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+        setOperationAction(ISD::BITCAST, MVT::i128, Custom);
       }
 
     }
@@ -1268,6 +1269,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
+  case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
   }
   return nullptr;
 }
@@ -7661,6 +7663,23 @@ static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
   return !(IsSplat && IsLoad);
 }
 
+// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
+SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
+
+  SDLoc dl(Op);
+  SDValue Op0 = Op->getOperand(0);
+
+  if (!EnableQuadPrecision ||
+      (Op.getValueType() != MVT::f128 ) ||
+      (Op0.getOpcode() != ISD::BUILD_PAIR) ||
+      (Op0.getOperand(0).getValueType() !=  MVT::i64) ||
+      (Op0.getOperand(1).getValueType() != MVT::i64))
+    return SDValue();
+
+  return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
+                     Op0.getOperand(1));
+}
+
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.  If we CAN select this case, and if it
 // selects to a single instruction, return Op.  Otherwise, if we can codegen
@@ -9455,6 +9474,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   // For counter-based loop handling.
   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
 
+  case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
+
   // Frame & Return address.
   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
author	Lei Huang <lei@ca.ibm.com>	2018-07-05 06:21:37 +0000
committer	Lei Huang <lei@ca.ibm.com>	2018-07-05 06:21:37 +0000
commit	a855e17f096d29e766362aa6e96ffe6d0c886ca2 (patch)
tree	79b12b8c74c0e75613e34d56fe02c14467520a3b /llvm/lib/Target/PowerPC/PPCISelLowering.cpp
parent	2161ec7ee2c4bc5e02e9c4de8a3857b37dba0839 (diff)
download	bcm5719-llvm-a855e17f096d29e766362aa6e96ffe6d0c886ca2.tar.gz bcm5719-llvm-a855e17f096d29e766362aa6e96ffe6d0c886ca2.zip