diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp | 36 | ||||
-rw-r--r-- | llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Target/X86/Utils/X86ShuffleDecode.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 79 |
4 files changed, 105 insertions, 30 deletions
diff --git a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp index 5deb6825148..cb120633a40 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -128,6 +128,42 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeMOVHLPSMask(2, ShuffleMask); break; + case X86::MOVSLDUPrr: + case X86::VMOVSLDUPrr: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::MOVSLDUPrm: + case X86::VMOVSLDUPrm: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeMOVSLDUPMask(MVT::v4f32, ShuffleMask); + break; + + case X86::VMOVSHDUPYrr: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::VMOVSHDUPYrm: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeMOVSHDUPMask(MVT::v8f32, ShuffleMask); + break; + + case X86::VMOVSLDUPYrr: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::VMOVSLDUPYrm: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeMOVSLDUPMask(MVT::v8f32, ShuffleMask); + break; + + case X86::MOVSHDUPrr: + case X86::VMOVSHDUPrr: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::MOVSHDUPrm: + case X86::VMOVSHDUPrm: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask); + break; + case X86::PALIGNR128rr: case X86::VPALIGNR128rr: Src1Name = getRegName(MI->getOperand(2).getReg()); diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 5da63bef4d7..51d251e551c 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -63,6 +63,22 @@ void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { ShuffleMask.push_back(NElts+i); } +void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + for (int i = 0, e = NumElts / 2; i < e; ++i) { + ShuffleMask.push_back(2 * i); + ShuffleMask.push_back(2 * i); + } +} + +void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + for (int i = 0, e = NumElts / 2; i < e; ++i) { + ShuffleMask.push_back(2 * i + 1); + ShuffleMask.push_back(2 * i + 1); + } +} + void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h index 911e7047ee6..03a843e7b8d 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -38,6 +38,10 @@ void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask); // <0,2> or <0,1,4,5> void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask); +void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask); + +void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask); + void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a4e94551020..04f1fafa2e7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5407,12 +5407,16 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); if (Mask.empty()) return false; break; + case X86ISD::MOVSLDUP: + DecodeMOVSLDUPMask(VT, Mask); + break; + case X86ISD::MOVSHDUP: + DecodeMOVSHDUPMask(VT, Mask); + break; case X86ISD::MOVDDUP: case X86ISD::MOVLHPD: case X86ISD::MOVLPD: case X86ISD::MOVLPS: - case X86ISD::MOVSHDUP: - case X86ISD::MOVSLDUP: // Not yet implemented return false; default: llvm_unreachable("unknown target shuffle node"); @@ -19364,38 +19368,53 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, // Note that even with AVX we prefer the PSHUFD form of shuffle for integer // vectors because it can have a load folded into it that UNPCK cannot. This // doesn't preclude something switching to the shorter encoding post-RA. - if (FloatDomain && (Mask.equals(0, 0) || Mask.equals(1, 1))) { - bool Lo = Mask.equals(0, 0); - unsigned Shuffle; - MVT ShuffleVT; - // Check if we have SSE3 which will let us use MOVDDUP. That instruction - // is no slower than UNPCKLPD but has the option to fold the input operand - // into even an unaligned memory load. - if (Lo && Subtarget->hasSSE3()) { - Shuffle = X86ISD::MOVDDUP; - ShuffleVT = MVT::v2f64; - } else { - // We have MOVLHPS and MOVHLPS throughout SSE and they encode smaller - // than the UNPCK variants. - Shuffle = Lo ? X86ISD::MOVLHPS : X86ISD::MOVHLPS; - ShuffleVT = MVT::v4f32; + if (FloatDomain) { + if (Mask.equals(0, 0) || Mask.equals(1, 1)) { + bool Lo = Mask.equals(0, 0); + unsigned Shuffle; + MVT ShuffleVT; + // Check if we have SSE3 which will let us use MOVDDUP. That instruction + // is no slower than UNPCKLPD but has the option to fold the input operand + // into even an unaligned memory load. + if (Lo && Subtarget->hasSSE3()) { + Shuffle = X86ISD::MOVDDUP; + ShuffleVT = MVT::v2f64; + } else { + // We have MOVLHPS and MOVHLPS throughout SSE and they encode smaller + // than the UNPCK variants. + Shuffle = Lo ? X86ISD::MOVLHPS : X86ISD::MOVHLPS; + ShuffleVT = MVT::v4f32; + } + if (Depth == 1 && Root->getOpcode() == Shuffle) + return false; // Nothing to do! + Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input); + DCI.AddToWorklist(Op.getNode()); + if (Shuffle == X86ISD::MOVDDUP) + Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op); + else + Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op); + DCI.AddToWorklist(Op.getNode()); + DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op), + /*AddTo*/ true); + return true; } - if (Depth == 1 && Root->getOpcode() == Shuffle) - return false; // Nothing to do! - Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input); - DCI.AddToWorklist(Op.getNode()); - if (Shuffle == X86ISD::MOVDDUP) + if (Subtarget->hasSSE3() && + (Mask.equals(0, 0, 2, 2) || Mask.equals(1, 1, 3, 3))) { + bool Lo = Mask.equals(0, 0, 2, 2); + unsigned Shuffle = Lo ? X86ISD::MOVSLDUP : X86ISD::MOVSHDUP; + MVT ShuffleVT = MVT::v4f32; + if (Depth == 1 && Root->getOpcode() == Shuffle) + return false; // Nothing to do! + Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input); + DCI.AddToWorklist(Op.getNode()); Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op); - else - Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op); - DCI.AddToWorklist(Op.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op), - /*AddTo*/ true); - return true; + DCI.AddToWorklist(Op.getNode()); + DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op), + /*AddTo*/ true); + return true; + } } - // FIXME: We should match UNPCKLPS and UNPCKHPS here. - // We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK // variants as none of these have single-instruction variants that are // superior to the UNPCK formulation. |