summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp36
-rw-r--r--llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp16
-rw-r--r--llvm/lib/Target/X86/Utils/X86ShuffleDecode.h4
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp79
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll30
5 files changed, 135 insertions, 30 deletions
diff --git a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 5deb6825148..cb120633a40 100644
--- a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -128,6 +128,42 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeMOVHLPSMask(2, ShuffleMask);
break;
+ case X86::MOVSLDUPrr:
+ case X86::VMOVSLDUPrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::MOVSLDUPrm:
+ case X86::VMOVSLDUPrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVSLDUPMask(MVT::v4f32, ShuffleMask);
+ break;
+
+ case X86::VMOVSHDUPYrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VMOVSHDUPYrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVSHDUPMask(MVT::v8f32, ShuffleMask);
+ break;
+
+ case X86::VMOVSLDUPYrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VMOVSLDUPYrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVSLDUPMask(MVT::v8f32, ShuffleMask);
+ break;
+
+ case X86::MOVSHDUPrr:
+ case X86::VMOVSHDUPrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::MOVSHDUPrm:
+ case X86::VMOVSHDUPrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);
+ break;
+
case X86::PALIGNR128rr:
case X86::VPALIGNR128rr:
Src1Name = getRegName(MI->getOperand(2).getReg());
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 5da63bef4d7..51d251e551c 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -63,6 +63,22 @@ void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
ShuffleMask.push_back(NElts+i);
}
+void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ for (int i = 0, e = NumElts / 2; i < e; ++i) {
+ ShuffleMask.push_back(2 * i);
+ ShuffleMask.push_back(2 * i);
+ }
+}
+
+void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ for (int i = 0, e = NumElts / 2; i < e; ++i) {
+ ShuffleMask.push_back(2 * i + 1);
+ ShuffleMask.push_back(2 * i + 1);
+ }
+}
+
void DecodePALIGNRMask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index 911e7047ee6..03a843e7b8d 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -38,6 +38,10 @@ void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
// <0,2> or <0,1,4,5>
void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
+void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
+
+void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
+
void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a4e94551020..04f1fafa2e7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5407,12 +5407,16 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
if (Mask.empty()) return false;
break;
+ case X86ISD::MOVSLDUP:
+ DecodeMOVSLDUPMask(VT, Mask);
+ break;
+ case X86ISD::MOVSHDUP:
+ DecodeMOVSHDUPMask(VT, Mask);
+ break;
case X86ISD::MOVDDUP:
case X86ISD::MOVLHPD:
case X86ISD::MOVLPD:
case X86ISD::MOVLPS:
- case X86ISD::MOVSHDUP:
- case X86ISD::MOVSLDUP:
// Not yet implemented
return false;
default: llvm_unreachable("unknown target shuffle node");
@@ -19364,38 +19368,53 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
// Note that even with AVX we prefer the PSHUFD form of shuffle for integer
// vectors because it can have a load folded into it that UNPCK cannot. This
// doesn't preclude something switching to the shorter encoding post-RA.
- if (FloatDomain && (Mask.equals(0, 0) || Mask.equals(1, 1))) {
- bool Lo = Mask.equals(0, 0);
- unsigned Shuffle;
- MVT ShuffleVT;
- // Check if we have SSE3 which will let us use MOVDDUP. That instruction
- // is no slower than UNPCKLPD but has the option to fold the input operand
- // into even an unaligned memory load.
- if (Lo && Subtarget->hasSSE3()) {
- Shuffle = X86ISD::MOVDDUP;
- ShuffleVT = MVT::v2f64;
- } else {
- // We have MOVLHPS and MOVHLPS throughout SSE and they encode smaller
- // than the UNPCK variants.
- Shuffle = Lo ? X86ISD::MOVLHPS : X86ISD::MOVHLPS;
- ShuffleVT = MVT::v4f32;
+ if (FloatDomain) {
+ if (Mask.equals(0, 0) || Mask.equals(1, 1)) {
+ bool Lo = Mask.equals(0, 0);
+ unsigned Shuffle;
+ MVT ShuffleVT;
+ // Check if we have SSE3 which will let us use MOVDDUP. That instruction
+ // is no slower than UNPCKLPD but has the option to fold the input operand
+ // into even an unaligned memory load.
+ if (Lo && Subtarget->hasSSE3()) {
+ Shuffle = X86ISD::MOVDDUP;
+ ShuffleVT = MVT::v2f64;
+ } else {
+ // We have MOVLHPS and MOVHLPS throughout SSE and they encode smaller
+ // than the UNPCK variants.
+ Shuffle = Lo ? X86ISD::MOVLHPS : X86ISD::MOVHLPS;
+ ShuffleVT = MVT::v4f32;
+ }
+ if (Depth == 1 && Root->getOpcode() == Shuffle)
+ return false; // Nothing to do!
+ Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ DCI.AddToWorklist(Op.getNode());
+ if (Shuffle == X86ISD::MOVDDUP)
+ Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op);
+ else
+ Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ /*AddTo*/ true);
+ return true;
}
- if (Depth == 1 && Root->getOpcode() == Shuffle)
- return false; // Nothing to do!
- Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
- DCI.AddToWorklist(Op.getNode());
- if (Shuffle == X86ISD::MOVDDUP)
+ if (Subtarget->hasSSE3() &&
+ (Mask.equals(0, 0, 2, 2) || Mask.equals(1, 1, 3, 3))) {
+ bool Lo = Mask.equals(0, 0, 2, 2);
+ unsigned Shuffle = Lo ? X86ISD::MOVSLDUP : X86ISD::MOVSHDUP;
+ MVT ShuffleVT = MVT::v4f32;
+ if (Depth == 1 && Root->getOpcode() == Shuffle)
+ return false; // Nothing to do!
+ Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ DCI.AddToWorklist(Op.getNode());
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op);
- else
- Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
- DCI.AddToWorklist(Op.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
- /*AddTo*/ true);
- return true;
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ /*AddTo*/ true);
+ return true;
+ }
}
- // FIXME: We should match UNPCKLPS and UNPCKHPS here.
-
// We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK
// variants as none of these have single-instruction variants that are
// superior to the UNPCK formulation.
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
index d5bb55a2caa..dcd8ab166cf 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -119,6 +119,36 @@ define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %shuffle
}
+define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: @shuffle_v4f32_0022
+; SSE2: shufps {{.*}} # xmm0 = xmm0[0,0,2,2]
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: @shuffle_v4f32_0022
+; SSE41: movsldup {{.*}} # xmm0 = xmm0[0,0,2,2]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: @shuffle_v4f32_0022
+; AVX1: vmovsldup {{.*}} # xmm0 = xmm0[0,0,2,2]
+; AVX1-NEXT: retq
+ %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: @shuffle_v4f32_1133
+; SSE2: shufps {{.*}} # xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: @shuffle_v4f32_1133
+; SSE41: movshdup {{.*}} # xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: @shuffle_v4f32_1133
+; AVX1: vmovshdup {{.*}} # xmm0 = xmm0[1,1,3,3]
+; AVX1-NEXT: retq
+ %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ ret <4 x float> %shuffle
+}
define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: @shuffle_v4i32_0124
OpenPOWER on IntegriCloud