summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp35
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll27
3 files changed, 36 insertions, 36 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0c7d4bcabe8..cff91d6e3ce 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30279,23 +30279,26 @@ static SDValue combineX86ShufflesRecursively(
// Add the inputs to the Ops list, avoiding duplicates.
SmallVector<SDValue, 16> Ops(SrcOps.begin(), SrcOps.end());
- int InputIdx0 = -1, InputIdx1 = -1;
- for (int i = 0, e = Ops.size(); i < e; ++i) {
- SDValue BC = peekThroughBitcasts(Ops[i]);
- if (Input0 && BC == peekThroughBitcasts(Input0))
- InputIdx0 = i;
- if (Input1 && BC == peekThroughBitcasts(Input1))
- InputIdx1 = i;
- }
+ auto AddOp = [&Ops](SDValue Input, int InsertionPoint = -1) -> int {
+ if (!Input)
+ return -1;
+ // Attempt to find an existing match.
+ SDValue InputBC = peekThroughBitcasts(Input);
+ for (int i = 0, e = Ops.size(); i < e; ++i)
+ if (InputBC == peekThroughBitcasts(Ops[i]))
+ return i;
+ // Match failed - should we replace an existing Op?
+ if (InsertionPoint >= 0) {
+ Ops[InsertionPoint] = Input;
+ return InsertionPoint;
+ }
+ // Add to the end of the Ops list.
+ Ops.push_back(Input);
+ return Ops.size() - 1;
+ };
- if (Input0 && InputIdx0 < 0) {
- InputIdx0 = SrcOpIndex;
- Ops[SrcOpIndex] = Input0;
- }
- if (Input1 && InputIdx1 < 0) {
- InputIdx1 = Ops.size();
- Ops.push_back(Input1);
- }
+ int InputIdx0 = AddOp(Input0, SrcOpIndex);
+ int InputIdx1 = AddOp(Input1);
assert(((RootMask.size() > OpMask.size() &&
RootMask.size() % OpMask.size() == 0) ||
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
index 05d19f315b0..122c77aefcc 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
@@ -923,18 +923,12 @@ define <32 x i8> @combine_pshufb_pshufb_or_as_unpcklbw(<32 x i8> %a0, <32 x i8>
define <32 x i8> @combine_pshufb_pshufb_or_pshufb(<32 x i8> %a0) {
; X32-LABEL: combine_pshufb_pshufb_or_pshufb:
; X32: # %bb.0:
-; X32-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero
-; X32-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero,ymm0[16,17,18,19]
-; X32-NEXT: vpor %ymm0, %ymm1, %ymm0
-; X32-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
+; X32-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X32-NEXT: retl
;
; X64-LABEL: combine_pshufb_pshufb_or_pshufb:
; X64: # %bb.0:
-; X64-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero
-; X64-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero,ymm0[16,17,18,19]
-; X64-NEXT: vpor %ymm0, %ymm1, %ymm0
-; X64-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
+; X64-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X64-NEXT: retq
%1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1>)
%2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3>)
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
index b67f9e266eb..a1316eb41f8 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
@@ -707,20 +707,23 @@ define <16 x i8> @combine_pshufb_pshufb_or_as_unpcklbw(<16 x i8> %a0, <16 x i8>
define <16 x i8> @combine_pshufb_pshufb_or_pshufb(<16 x i8> %a0) {
; SSE-LABEL: combine_pshufb_pshufb_or_pshufb:
; SSE: # %bb.0:
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3],zero,zero,zero,zero,xmm1[0,1,2,3],zero,zero,zero,zero
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3]
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_pshufb_pshufb_or_pshufb:
-; AVX: # %bb.0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3]
-; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
-; AVX-NEXT: retq
+; AVX1-LABEL: combine_pshufb_pshufb_or_pshufb:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_pshufb_pshufb_or_pshufb:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vbroadcastss %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: combine_pshufb_pshufb_or_pshufb:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vbroadcastss %xmm0, %xmm0
+; AVX512F-NEXT: retq
%1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1>)
%2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3>)
%3 = or <16 x i8> %1, %2
OpenPOWER on IntegriCloud