diff options
| author | Craig Topper <craig.topper@intel.com> | 2019-08-06 21:43:15 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2019-08-06 21:43:15 +0000 |
| commit | ecc1e5d476581ea85777656e17e3c2a61a874566 (patch) | |
| tree | 4230a51c55cfecbe66cccf30a548c46e7e414fdc | |
| parent | 906e727972d1f4873d006630d6aa6795a7025a1c (diff) | |
| download | bcm5719-llvm-ecc1e5d476581ea85777656e17e3c2a61a874566.tar.gz bcm5719-llvm-ecc1e5d476581ea85777656e17e3c2a61a874566.zip | |
[X86] Don't allow combineSIntToFP to create v2i32 vectors after type legalization.
If we're after type legalization we should only be trying to turn
v2i64 into v2i32. So bitcast to v4i32, shuffle the even elements
together. Then use X86ISD::CVTSI2P. The alternative is to leave
the v2i64 type alone and let it scalarized. Hopefully keeping
it packed is better.
Fixes PR42905.
llvm-svn: 368091
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pr42905.ll | 26 |
2 files changed, 40 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 43bf7289c16..826ce263aca 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -42848,6 +42848,7 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG, } static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { // First try to optimize away the conversion entirely when it's // conditionally from a constant. Vectors only. @@ -42877,13 +42878,22 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, unsigned BitWidth = InVT.getScalarSizeInBits(); unsigned NumSignBits = DAG.ComputeNumSignBits(Op0); if (NumSignBits >= (BitWidth - 31)) { - EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), 32); + EVT TruncVT = MVT::i32; if (InVT.isVector()) TruncVT = EVT::getVectorVT(*DAG.getContext(), TruncVT, InVT.getVectorNumElements()); SDLoc dl(N); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0); - return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc); + if (DCI.isBeforeLegalize() || TruncVT != MVT::v2i32) { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0); + return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc); + } + // If we're after legalize and the type is v2i32 we need to shuffle and + // use CVTSI2P. + assert(InVT == MVT::v2i64 && "Unexpected VT!"); + SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0); + SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast, + { 0, 2, -1, -1 }); + return DAG.getNode(X86ISD::CVTSI2P, dl, VT, Shuf); } } @@ -44481,7 +44491,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget); case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget); case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget); - case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget); + case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, DCI, Subtarget); case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget); case ISD::FADD: case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); diff --git a/llvm/test/CodeGen/X86/pr42905.ll b/llvm/test/CodeGen/X86/pr42905.ll new file mode 100644 index 00000000000..bb51aced225 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr42905.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define <4 x double> @autogen_SD30452(i1 %L230) { +; CHECK-LABEL: autogen_SD30452: +; CHECK: # %bb.0: # %BB +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [151829,151829] +; CHECK-NEXT: movq %xmm0, %rax +; CHECK-NEXT: cvtsi2sd %rax, %xmm0 +; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; CHECK-NEXT: movq %xmm2, %rax +; CHECK-NEXT: xorps %xmm2, %xmm2 +; CHECK-NEXT: cvtsi2sd %rax, %xmm2 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1 +; CHECK-NEXT: retq +BB: + %I = insertelement <4 x i64> zeroinitializer, i64 151829, i32 3 + %Shuff7 = shufflevector <4 x i64> %I, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef> + br label %CF242 + +CF242: ; preds = %CF242, %BB + %FC125 = sitofp <4 x i64> %Shuff7 to <4 x double> + ret <4 x double> %FC125 +} |

