summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-08-04 01:14:24 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-08-04 01:14:24 +0000
commit37a18821cd8b0df5ac877659d2d397f0e268407c (patch)
treed537d3e1d271f6cc2dea8589b603911cec9b57c3
parent7bbfd245b08fda1503ec7e3c6954c100235cdee2 (diff)
downloadbcm5719-llvm-37a18821cd8b0df5ac877659d2d397f0e268407c.tar.gz
bcm5719-llvm-37a18821cd8b0df5ac877659d2d397f0e268407c.zip
[x86] Handle single input shuffles in the SSSE3 case more intelligently.
I spent some time looking into a better or more principled way to handle this. For example, by detecting arbitrary "unneeded" ORs... But really, there wasn't any point. We just shouldn't build blatantly wrong code so late in the pipeline rather than adding more stages and logic later on to fix it. Avoiding this is just too simple. llvm-svn: 214680
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp4
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll4
2 files changed, 4 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5ae752c1da5..94c19fb4c40 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7931,6 +7931,10 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
}
V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask));
+ if (isSingleInputShuffleMask(Mask))
+ return V1; // Single inputs are easy.
+
+ // Otherwise, blend the two.
V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V2,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask));
return DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
index 49620e93d6f..693a2764d43 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -179,9 +179,7 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(
;
; SSSE3-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
ret <16 x i8> %shuffle
@@ -275,9 +273,7 @@ define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) {
;
; SSSE3-LABEL: @trunc_v4i32_shuffle
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i8> %shuffle
OpenPOWER on IntegriCloud