summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp51
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining.ll54
2 files changed, 61 insertions, 44 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e6844e556b1..6a62bf15929 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16696,11 +16696,15 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
/// Convert a disguised subvector insertion into a shuffle:
SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
+ assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ "Expected extract_vector_elt");
SDValue InsertVal = N->getOperand(1);
SDValue Vec = N->getOperand(0);
- // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex)
- // --> (vector_shuffle X, Y)
+ // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
+ // InsIndex)
+ // --> (vector_shuffle X, Y) and variations where shuffle operands may be
+ // CONCAT_VECTORS.
if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isa<ConstantSDNode>(InsertVal.getOperand(1))) {
@@ -16713,18 +16717,47 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
// Vec's operand 0 is using indices from 0 to N-1 and
// operand 1 from N to 2N - 1, where N is the number of
// elements in the vectors.
- int XOffset = -1;
- if (InsertVal.getOperand(0) == X) {
- XOffset = 0;
- } else if (InsertVal.getOperand(0) == Y) {
- XOffset = X.getValueType().getVectorNumElements();
+ SDValue InsertVal0 = InsertVal.getOperand(0);
+ int ElementOffset = -1;
+
+ // We explore the inputs of the shuffle in order to see if we find the
+ // source of the extract_vector_elt. If so, we can use it to modify the
+ // shuffle rather than perform an insert_vector_elt.
+ SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
+ ArgWorkList.emplace_back(Mask.size(), Y);
+ ArgWorkList.emplace_back(0, X);
+
+ while (!ArgWorkList.empty()) {
+ int ArgOffset;
+ SDValue ArgVal;
+ std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
+
+ if (ArgVal == InsertVal0) {
+ ElementOffset = ArgOffset;
+ break;
+ }
+
+ // Peek through concat_vector.
+ if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
+ int CurrentArgOffset =
+ ArgOffset + ArgVal.getValueType().getVectorNumElements();
+ int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
+ for (SDValue Op : reverse(ArgVal->ops())) {
+ CurrentArgOffset -= Step;
+ ArgWorkList.emplace_back(CurrentArgOffset, Op);
+ }
+
+ // Make sure we went through all the elements and did not screw up index
+ // computation.
+ assert(CurrentArgOffset == ArgOffset);
+ }
}
- if (XOffset != -1) {
+ if (ElementOffset != -1) {
SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
- NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue();
+ NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
assert(NewMask[InsIndex] <
(int)(2 * Vec.getValueType().getVectorNumElements()) &&
NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
index b5dff70e234..8cf8cab8b79 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -2914,56 +2914,40 @@ define <8 x i16> @shuffle_extract_insert_double(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_extract_concat_insert(<4 x i16> %lhsa, <4 x i16> %rhsa, <8 x i16> %b) {
; SSE2-LABEL: shuffle_extract_concat_insert:
; SSE2: # %bb.0:
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: pextrw $2, %xmm1, %ecx
-; SSE2-NEXT: pextrw $5, %xmm2, %edx
-; SSE2-NEXT: pextrw $7, %xmm2, %esi
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,0,3,4,5,6,7]
-; SSE2-NEXT: pinsrw $4, %ecx, %xmm0
-; SSE2-NEXT: pinsrw $5, %edx, %xmm0
-; SSE2-NEXT: pinsrw $6, %eax, %xmm0
-; SSE2-NEXT: pinsrw $7, %esi, %xmm0
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,3,2,4,5,6,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_extract_concat_insert:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: pextrw $2, %xmm1, %eax
-; SSSE3-NEXT: pextrw $5, %xmm2, %ecx
-; SSSE3-NEXT: pextrw $7, %xmm2, %edx
-; SSSE3-NEXT: movd %xmm1, %esi
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,1,6,7,10,11,14,15,14,15,10,11,12,13,14,15]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,2,3,0,1,14,15,8,9,14,15,12,13,14,15]
-; SSSE3-NEXT: pinsrw $4, %eax, %xmm0
-; SSSE3-NEXT: pinsrw $5, %ecx, %xmm0
-; SSSE3-NEXT: pinsrw $6, %esi, %xmm0
-; SSSE3-NEXT: pinsrw $7, %edx, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_extract_concat_insert:
; SSE41: # %bb.0:
-; SSE41-NEXT: movd %xmm1, %eax
-; SSE41-NEXT: pextrw $2, %xmm1, %ecx
+; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,8,9,12,13,12,13,14,15]
+; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,1,6,7,10,11,14,15,14,15,10,11,12,13,14,15]
; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,2,3,0,1,14,15,8,9,14,15,12,13,14,15]
-; SSE41-NEXT: pinsrw $4, %ecx, %xmm0
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm2[5],xmm0[6,7]
-; SSE41-NEXT: pinsrw $6, %eax, %xmm0
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_extract_concat_insert:
; AVX: # %bb.0:
-; AVX-NEXT: vmovd %xmm1, %eax
-; AVX-NEXT: vpextrw $2, %xmm1, %ecx
-; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,2,3,0,1,14,15,8,9,14,15,12,13,14,15]
-; AVX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
-; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm2[5],xmm0[6,7]
-; AVX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,8,9,12,13,12,13,14,15]
+; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm2[0,1,6,7,10,11,14,15,14,15,10,11,12,13,14,15]
+; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX-NEXT: retq
%a = shufflevector <4 x i16> %lhsa, <4 x i16> %rhsa, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%a0 = extractelement <8 x i16> %a, i32 0
OpenPOWER on IntegriCloud