summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-11-21 14:53:03 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-11-21 14:53:03 +0000
commitce5a26b0e779afa1a001e5182fd47fa1f2cc658a (patch)
tree439dfb23e27413878d9a0428b08ffe619fb8555f /llvm/lib/Target
parentf769ae1ac43f8703328bbc9b12b0436c0e0ae0ba (diff)
downloadbcm5719-llvm-ce5a26b0e779afa1a001e5182fd47fa1f2cc658a.tar.gz
bcm5719-llvm-ce5a26b0e779afa1a001e5182fd47fa1f2cc658a.zip
[x86] Restructure the checking patterns for v16 and v32 avx2 vector
shuffle lowering to allow much better blend matching. Specifically, with the new structure the code seems clearer to me and we correctly can hit the cases where merging two 128-bit lanes is a clear win and can be shuffled cheaply afterward. llvm-svn: 222539
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp52
1 files changed, 24 insertions, 28 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 76f35070c63..4ec7fa15198 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -10433,20 +10433,6 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
Mask, Subtarget, DAG))
return Broadcast;
- // There are no generalized cross-lane shuffle operations available on i16
- // element types.
- if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask)) {
- // Try to simplify this by merging 128-bit lanes to enable a lane-based
- // shuffle.
- if (!isSingleInputShuffleMask(Mask))
- if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
- DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
- return Result;
-
- return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2,
- Mask, DAG);
- }
-
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
Subtarget, DAG))
return Blend;
@@ -10466,6 +10452,12 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i16, V1, V2);
if (isSingleInputShuffleMask(Mask)) {
+ // There are no generalized cross-lane shuffle operations available on i16
+ // element types.
+ if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask))
+ return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2,
+ Mask, DAG);
+
SDValue PSHUFBMask[32];
for (int i = 0; i < 16; ++i) {
if (Mask[i] == -1) {
@@ -10486,6 +10478,12 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask)));
}
+ // Try to simplify this by merging 128-bit lanes to enable a lane-based
+ // shuffle.
+ if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+ DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
+ return Result;
+
// Otherwise fall back on generic lowering.
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, DAG);
}
@@ -10510,20 +10508,6 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
Mask, Subtarget, DAG))
return Broadcast;
- // There are no generalized cross-lane shuffle operations available on i8
- // element types.
- if (is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) {
- // Try to simplify this by merging 128-bit lanes to enable a lane-based
- // shuffle.
- if (!isSingleInputShuffleMask(Mask))
- if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
- DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
- return Result;
-
- return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask,
- DAG);
- }
-
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
Subtarget, DAG))
return Blend;
@@ -10547,6 +10531,12 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v32i8, V1, V2);
if (isSingleInputShuffleMask(Mask)) {
+ // There are no generalized cross-lane shuffle operations available on i8
+ // element types.
+ if (is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask))
+ return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2,
+ Mask, DAG);
+
SDValue PSHUFBMask[32];
for (int i = 0; i < 32; ++i)
PSHUFBMask[i] =
@@ -10559,6 +10549,12 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask));
}
+ // Try to simplify this by merging 128-bit lanes to enable a lane-based
+ // shuffle.
+ if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+ DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
+ return Result;
+
// Otherwise fall back on generic lowering.
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, DAG);
}
OpenPOWER on IntegriCloud