summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-10-11 17:54:15 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-10-11 17:54:15 +0000
commitaf6c15f67916c30acda3aed908382230882ee03c (patch)
treedeb6b4791401654f20f3a7199d24eec8795ba259
parent6aacd968754341a0743a427d842cacb91637279c (diff)
downloadbcm5719-llvm-af6c15f67916c30acda3aed908382230882ee03c.tar.gz
bcm5719-llvm-af6c15f67916c30acda3aed908382230882ee03c.zip
[X86][SSE] Add support for v4i8 add reduction
llvm-svn: 374579
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp9
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-add.ll69
2 files changed, 33 insertions, 45 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ad0f7134c25..38b6b96f3d7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36239,10 +36239,15 @@ static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG,
SDLoc DL(ExtElt);
- if (VecVT == MVT::v8i8) {
+ // vXi8 reduction - sub 128-bit vector.
+ if (VecVT == MVT::v4i8 || VecVT == MVT::v8i8) {
+ // Pad with zero.
+ if (VecVT == MVT::v4i8)
+ Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, Rdx,
+ DAG.getConstant(0, DL, VecVT));
// Pad with undef.
Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx,
- DAG.getUNDEF(VecVT));
+ DAG.getUNDEF(MVT::v8i8));
Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx,
DAG.getConstant(0, DL, MVT::v16i8));
Rdx = DAG.getBitcast(MVT::v16i8, Rdx);
diff --git a/llvm/test/CodeGen/X86/vector-reduce-add.ll b/llvm/test/CodeGen/X86/vector-reduce-add.ll
index 86c8b982cb0..eb124651003 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-add.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-add.ll
@@ -1029,44 +1029,36 @@ define i8 @test_v2i8_load(<2 x i8>* %p) {
define i8 @test_v4i8(<4 x i8> %a0) {
; SSE2-LABEL: test_v4i8:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: paddb %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrlw $8, %xmm0
-; SSE2-NEXT: paddb %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: psadbw %xmm1, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v4i8:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $16, %xmm1
-; SSE41-NEXT: paddb %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrlw $8, %xmm0
-; SSE41-NEXT: paddb %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: psadbw %xmm0, %xmm1
+; SSE41-NEXT: pextrb $0, %xmm1, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v4i8:
; AVX: # %bb.0:
-; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v4i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
@@ -1078,36 +1070,28 @@ define i8 @test_v4i8_load(<4 x i8>* %p) {
; SSE2-LABEL: test_v4i8_load:
; SSE2: # %bb.0:
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: paddb %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrlw $8, %xmm0
-; SSE2-NEXT: paddb %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: psadbw %xmm0, %xmm1
+; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v4i8_load:
; SSE41: # %bb.0:
; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $16, %xmm1
-; SSE41-NEXT: paddb %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrlw $8, %xmm0
-; SSE41-NEXT: paddb %xmm1, %xmm0
-; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: psadbw %xmm0, %xmm1
+; SSE41-NEXT: pextrb $0, %xmm1, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v4i8_load:
; AVX: # %bb.0:
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
@@ -1115,10 +1099,9 @@ define i8 @test_v4i8_load(<4 x i8>* %p) {
; AVX512-LABEL: test_v4i8_load:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
OpenPOWER on IntegriCloud