diff options
| author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-06-10 16:42:57 +0000 |
|---|---|---|
| committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-06-10 16:42:57 +0000 |
| commit | fa508af0fec7a6efda5ae1be2dac4c43b4a0dfc8 (patch) | |
| tree | cd64293521a1231c62cdff8717f2f506792b04d4 /llvm/lib | |
| parent | d172270c44ad57827539e04bc244aa07e10d37e9 (diff) | |
| download | bcm5719-llvm-fa508af0fec7a6efda5ae1be2dac4c43b4a0dfc8.tar.gz bcm5719-llvm-fa508af0fec7a6efda5ae1be2dac4c43b4a0dfc8.zip | |
[X86] Improved target combine rules for selecting horizontal add/sub.
This patch slightly changes the algorithm introduced at revision 210477
to fix a problem where the algorithm was producing incorrect code for
the VEX.256 encoded versions of horizontal add/sub.
For these cases, we now try to split the two 256-bit vectors into
128-bit chunks before emitting horizontal add/sub dag nodes.
Added a new test case into haddsub-2.ll.
llvm-svn: 210545
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 22 |
1 files changed, 20 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 93de0ea64f4..5bf18107500 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6061,9 +6061,9 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, // Try to match a horizontal ADD or SUB. if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) || - ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget->hasAVX()) || ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) || - ((VT == MVT::v8i32 || VT == MVT::v16i16) && Subtarget->hasAVX2())) { + ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 || + VT == MVT::v16i16) && Subtarget->hasAVX())) { unsigned NumOperands = N->getNumOperands(); unsigned Opcode = N->getOperand(0)->getOpcode(); bool isCommutable = false; @@ -6131,6 +6131,24 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, case ISD::FSUB : NewOpcode = X86ISD::FHSUB; break; } + if (VT.is256BitVector()) { + SDLoc dl(N); + + // Convert this sequence into two horizontal add/sub followed + // by a concat vector. + SDValue InVec0_LO = Extract128BitVector(InVec0, 0, DAG, dl); + SDValue InVec0_HI = + Extract128BitVector(InVec0, NumOperands/2, DAG, dl); + SDValue InVec1_LO = Extract128BitVector(InVec1, 0, DAG, dl); + SDValue InVec1_HI = + Extract128BitVector(InVec1, NumOperands/2, DAG, dl); + EVT NewVT = InVec0_LO.getValueType(); + + SDValue LO = DAG.getNode(NewOpcode, dl, NewVT, InVec0_LO, InVec0_HI); + SDValue HI = DAG.getNode(NewOpcode, dl, NewVT, InVec1_LO, InVec1_HI); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LO, HI); + } + return DAG.getNode(NewOpcode, SDLoc(N), VT, InVec0, InVec1); } } |

