summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-10-11 20:36:06 +0000
committerCraig Topper <craig.topper@intel.com>2018-10-11 20:36:06 +0000
commit35d513c7e4cdd910c581a83186af601a6e419059 (patch)
tree46041b247fc57b84ea4d8a14fb323340fdfd960e /llvm/lib/Target/X86
parentc7497d3ac535bcf4a6fe3756d79a97ed10bb940c (diff)
downloadbcm5719-llvm-35d513c7e4cdd910c581a83186af601a6e419059.tar.gz
bcm5719-llvm-35d513c7e4cdd910c581a83186af601a6e419059.zip
[X86] Type legalize v2f32 loads by using an f64 load and a scalar_to_vector.
On 64-bit targets the generic legalize will use an i64 load and a scalar_to_vector for us. But on 32-bit targets i64 isn't legal and the generic legalizer will end up emitting two 32-bit loads. We have DAG combines that try to put those two loads back together with pretty good success. This patch instead uses f64 to avoid the splitting entirely. I've made it do the same for 64-bit mode for consistency and to keep the load in the fp domain. There are a few things in here that look like regressions in 32-bit mode, but I believe they bring us closer to the 64-bit mode codegen. And that the 64-bit mode code could be better. I think those issues should be looked at separately. Differential Revision: https://reviews.llvm.org/D52528 llvm-svn: 344291
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp24
1 files changed, 24 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 67f98d8ee72..d118e38ae72 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -902,6 +902,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
+ // We want to legalize this to an f64 load rather than an i64 load on
+ // 64-bit targets and two 32-bit loads on a 32-bit target.
+ setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
+
setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
@@ -26420,6 +26424,26 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
break;
}
+ case ISD::LOAD: {
+ // Use an f64 load and a scalar_to_vector for v2f32 loads. This avoids
+ // scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp cast
+ // since type legalization will try to use an i64 load.
+ EVT VT = N->getValueType(0);
+ assert(VT == MVT::v2f32 && "Unexpected VT");
+ if (!ISD::isNON_EXTLoad(N))
+ return;
+ auto *Ld = cast<LoadSDNode>(N);
+ SDValue Res = DAG.getLoad(MVT::f64, dl, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(),
+ Ld->getAlignment(),
+ Ld->getMemOperand()->getFlags());
+ SDValue Chain = Res.getValue(1);
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Res);
+ Res = DAG.getBitcast(MVT::v4f32, Res);
+ Results.push_back(Res);
+ Results.push_back(Chain);
+ return;
+ }
}
}
OpenPOWER on IntegriCloud