summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-08-06 21:43:15 +0000
committerCraig Topper <craig.topper@intel.com>2019-08-06 21:43:15 +0000
commitecc1e5d476581ea85777656e17e3c2a61a874566 (patch)
tree4230a51c55cfecbe66cccf30a548c46e7e414fdc
parent906e727972d1f4873d006630d6aa6795a7025a1c (diff)
downloadbcm5719-llvm-ecc1e5d476581ea85777656e17e3c2a61a874566.tar.gz
bcm5719-llvm-ecc1e5d476581ea85777656e17e3c2a61a874566.zip
[X86] Don't allow combineSIntToFP to create v2i32 vectors after type legalization.
If we're after type legalization we should only be trying to turn v2i64 into v2i32. So bitcast to v4i32, shuffle the even elements together. Then use X86ISD::CVTSI2P. The alternative is to leave the v2i64 type alone and let it scalarized. Hopefully keeping it packed is better. Fixes PR42905. llvm-svn: 368091
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp18
-rw-r--r--llvm/test/CodeGen/X86/pr42905.ll26
2 files changed, 40 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 43bf7289c16..826ce263aca 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42848,6 +42848,7 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
}
static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
// First try to optimize away the conversion entirely when it's
// conditionally from a constant. Vectors only.
@@ -42877,13 +42878,22 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
unsigned BitWidth = InVT.getScalarSizeInBits();
unsigned NumSignBits = DAG.ComputeNumSignBits(Op0);
if (NumSignBits >= (BitWidth - 31)) {
- EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), 32);
+ EVT TruncVT = MVT::i32;
if (InVT.isVector())
TruncVT = EVT::getVectorVT(*DAG.getContext(), TruncVT,
InVT.getVectorNumElements());
SDLoc dl(N);
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0);
- return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc);
+ if (DCI.isBeforeLegalize() || TruncVT != MVT::v2i32) {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0);
+ return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc);
+ }
+ // If we're after legalize and the type is v2i32 we need to shuffle and
+ // use CVTSI2P.
+ assert(InVT == MVT::v2i64 && "Unexpected VT!");
+ SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0);
+ SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast,
+ { 0, 2, -1, -1 });
+ return DAG.getNode(X86ISD::CVTSI2P, dl, VT, Shuf);
}
}
@@ -44481,7 +44491,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget);
case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget);
- case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget);
+ case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, DCI, Subtarget);
case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget);
case ISD::FADD:
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
diff --git a/llvm/test/CodeGen/X86/pr42905.ll b/llvm/test/CodeGen/X86/pr42905.ll
new file mode 100644
index 00000000000..bb51aced225
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr42905.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define <4 x double> @autogen_SD30452(i1 %L230) {
+; CHECK-LABEL: autogen_SD30452:
+; CHECK: # %bb.0: # %BB
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [151829,151829]
+; CHECK-NEXT: movq %xmm0, %rax
+; CHECK-NEXT: cvtsi2sd %rax, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; CHECK-NEXT: movq %xmm2, %rax
+; CHECK-NEXT: xorps %xmm2, %xmm2
+; CHECK-NEXT: cvtsi2sd %rax, %xmm2
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1
+; CHECK-NEXT: retq
+BB:
+ %I = insertelement <4 x i64> zeroinitializer, i64 151829, i32 3
+ %Shuff7 = shufflevector <4 x i64> %I, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
+ br label %CF242
+
+CF242: ; preds = %CF242, %BB
+ %FC125 = sitofp <4 x i64> %Shuff7 to <4 x double>
+ ret <4 x double> %FC125
+}
OpenPOWER on IntegriCloud