summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-09-24 01:24:44 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-09-24 01:24:44 +0000
commit9bd10e7492a93da98951813b48e57671eab72e21 (patch)
tree6e12c106b74c108d7365e238ce5a96f83b2f3af7 /llvm/lib
parentfd11815a7d47a649d609d50cdde09313db67b815 (diff)
downloadbcm5719-llvm-9bd10e7492a93da98951813b48e57671eab72e21.tar.gz
bcm5719-llvm-9bd10e7492a93da98951813b48e57671eab72e21.zip
[x86] Teach the new vector shuffle lowering to lower v8i32 shuffles with
the native AVX2 instructions. Note that the test case is really frustrating here because VPERMD requires the mask to be in the register input and we don't produce a comment looking through that to the constant pool. I'm going to attempt to improve this in a subsequent commit, but not sure if I will succeed. llvm-svn: 218347
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp55
1 files changed, 50 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 279c21db8b8..3ac4c6d5920 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9544,11 +9544,56 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
assert(Subtarget->hasAVX2() && "We can only lower v8i32 with AVX2!");
- // FIXME: Actually implement this using AVX2!!!
- V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8f32, V1);
- V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8f32, V2);
- return DAG.getNode(ISD::BITCAST, DL, MVT::v8i32,
- DAG.getVectorShuffle(MVT::v8f32, DL, V1, V2, Mask));
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // If the shuffle mask is repeated in each 128-bit lane we can use more
+ // efficient instructions that mirror the shuffles across the two 128-bit
+ // lanes.
+ SmallVector<int, 4> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask)) {
+ assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
+ if (isSingleInputShuffleMask(Mask))
+ return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1,
+ getV4X86ShuffleImm8ForMask(RepeatedMask, DAG));
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 8, 1, 9))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V1, V2);
+ if (isShuffleEquivalent(Mask, 2, 10, 3, 11))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V1, V2);
+ }
+
+ // If the shuffle patterns aren't repeated but it is a single input, directly
+ // generate a cross-lane VPERMD instruction.
+ if (isSingleInputShuffleMask(Mask)) {
+ SDValue VPermMask[8];
+ for (int i = 0; i < 8; ++i)
+ VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32)
+ : DAG.getConstant(Mask[i], MVT::i32);
+ return DAG.getNode(
+ X86ISD::VPERMV, DL, MVT::v8i32,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1);
+ }
+
+ // Shuffle the input elements into the desired positions in V1 and V2 and
+ // blend them together.
+ int V1Mask[] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ int V2Mask[] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ int BlendMask[] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ for (int i = 0; i < 8; ++i)
+ if (Mask[i] >= 0 && Mask[i] < 8) {
+ V1Mask[i] = Mask[i];
+ BlendMask[i] = i;
+ } else if (Mask[i] >= 8) {
+ V2Mask[i] = Mask[i] - 8;
+ BlendMask[i] = i + 8;
+ }
+
+ V1 = DAG.getVectorShuffle(MVT::v8i32, DL, V1, DAG.getUNDEF(MVT::v8i32), V1Mask);
+ V2 = DAG.getVectorShuffle(MVT::v8i32, DL, V2, DAG.getUNDEF(MVT::v8i32), V2Mask);
+ return DAG.getVectorShuffle(MVT::v8i32, DL, V1, V2, BlendMask);
}
/// \brief Handle lowering of 16-lane 16-bit integer shuffles.
OpenPOWER on IntegriCloud