summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
diff options
context:
space:
mode:
authorLei Huang <lei@ca.ibm.com>2018-07-05 06:21:37 +0000
committerLei Huang <lei@ca.ibm.com>2018-07-05 06:21:37 +0000
commita855e17f096d29e766362aa6e96ffe6d0c886ca2 (patch)
tree79b12b8c74c0e75613e34d56fe02c14467520a3b /llvm/lib/Target/PowerPC/PPCISelLowering.cpp
parent2161ec7ee2c4bc5e02e9c4de8a3857b37dba0839 (diff)
downloadbcm5719-llvm-a855e17f096d29e766362aa6e96ffe6d0c886ca2.tar.gz
bcm5719-llvm-a855e17f096d29e766362aa6e96ffe6d0c886ca2.zip
[Power9] Ensure float128 in non-homogenous aggregates are passed via VSX reg
Non-homogenous aggregates are passed in consecutive GPRs, in GPRs and in memory, or in memory. This patch ensures that float128 members of non-homogenous aggregates are passed via VSX registers. This is done via custom lowering a bitcast of a build_pari(i64,i64) to float128 to a new PPCISD node, BUILD_FP128. Differential Revision: https://reviews.llvm.org/D48308 llvm-svn: 336310
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp21
1 files changed, 21 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 705e1e07710..18d94ec8a50 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -814,6 +814,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i128, Custom);
}
}
@@ -1268,6 +1269,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
case PPCISD::QBFLT: return "PPCISD::QBFLT";
case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
+ case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
}
return nullptr;
}
@@ -7661,6 +7663,23 @@ static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
return !(IsSplat && IsLoad);
}
+// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
+SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
+
+ SDLoc dl(Op);
+ SDValue Op0 = Op->getOperand(0);
+
+ if (!EnableQuadPrecision ||
+ (Op.getValueType() != MVT::f128 ) ||
+ (Op0.getOpcode() != ISD::BUILD_PAIR) ||
+ (Op0.getOperand(0).getValueType() != MVT::i64) ||
+ (Op0.getOperand(1).getValueType() != MVT::i64))
+ return SDValue();
+
+ return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
+ Op0.getOperand(1));
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -9455,6 +9474,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
// For counter-based loop handling.
case ISD::INTRINSIC_W_CHAIN: return SDValue();
+ case ISD::BITCAST: return LowerBITCAST(Op, DAG);
+
// Frame & Return address.
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
OpenPOWER on IntegriCloud