summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZvi Rackover <zvi.rackover@intel.com>2018-01-11 18:02:33 +0000
committerZvi Rackover <zvi.rackover@intel.com>2018-01-11 18:02:33 +0000
commit999e6c296791cbf9b73ada4d0c8f57a9692974af (patch)
tree2d9dcbbc0e5a6ad1dd9f98437b7833beeb1f29e6
parentbe6fa82ee56ea48068061d1fe77138f9b5bc1c51 (diff)
downloadbcm5719-llvm-999e6c296791cbf9b73ada4d0c8f57a9692974af.tar.gz
bcm5719-llvm-999e6c296791cbf9b73ada4d0c8f57a9692974af.zip
DAGCombine: Let truncates negate extension through extract-subvector
Summary: Fold cases such as: (v8i8 truncate (v8i32 extract_subvector (v16i32 sext (v16i8 V), Idx))) -> (v8i8 extract_subvector (v16i8 V), Idx) This can be generalized to cases where the truncate and extend do not fully cancel each other out, but it may require querying the target about profitability. Reviewers: RKSimon, craig.topper, spatel, efriedma Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D41927 llvm-svn: 322300
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp16
-rw-r--r--llvm/test/CodeGen/X86/trunc-subvector.ll87
2 files changed, 37 insertions, 66 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1820d5f944f..4ae22d69eaf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8779,6 +8779,22 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
}
+ // fold (truncate (extract_subvector(ext x))) ->
+ // (extract_subvector x)
+ // TODO: This can be generalized to cover cases where the truncate and extract
+ // do not fully cancel each other out.
+ if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::SIGN_EXTEND ||
+ N00.getOpcode() == ISD::ZERO_EXTEND ||
+ N00.getOpcode() == ISD::ANY_EXTEND) {
+ if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
+ VT.getVectorElementType())
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
+ N00.getOperand(0), N0.getOperand(1));
+ }
+ }
+
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
diff --git a/llvm/test/CodeGen/X86/trunc-subvector.ll b/llvm/test/CodeGen/X86/trunc-subvector.ll
index 35e6040081a..332bf58fa8f 100644
--- a/llvm/test/CodeGen/X86/trunc-subvector.ll
+++ b/llvm/test/CodeGen/X86/trunc-subvector.ll
@@ -7,26 +7,13 @@
define <4 x i32> @test1(<8 x i32> %v) {
; SSE2-LABEL: test1:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
-; AVX2-LABEL: test1:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
-; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: test1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0
-; AVX512-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: test1:
+; AVX: # %bb.0:
+; AVX-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
%x = sext <8 x i32> %v to <8 x i64>
%s = shufflevector <8 x i64> %x, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%t = trunc <4 x i64> %s to <4 x i32>
@@ -36,29 +23,14 @@ define <4 x i32> @test1(<8 x i32> %v) {
define <4 x i32> @test2(<8 x i32> %v) {
; SSE2-LABEL: test2:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; AVX2-LABEL: test2:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
-; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: test2:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; AVX512-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: test2:
+; AVX: # %bb.0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
%x = sext <8 x i32> %v to <8 x i64>
%s = shufflevector <8 x i64> %x, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%t = trunc <4 x i64> %s to <4 x i32>
@@ -164,19 +136,11 @@ define <4 x i32> @test6(<8 x i32> %v) {
; SSE2: # %bb.0:
; SSE2-NEXT: retq
;
-; AVX2-LABEL: test6:
-; AVX2: # %bb.0:
-; AVX2-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: test6:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
-; AVX512-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: test6:
+; AVX: # %bb.0:
+; AVX-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
%x = zext <8 x i32> %v to <8 x i64>
%s = shufflevector <8 x i64> %x, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%t = trunc <4 x i64> %s to <4 x i32>
@@ -189,20 +153,11 @@ define <4 x i32> @test7(<8 x i32> %v) {
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; AVX2-LABEL: test7:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: test7:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; AVX512-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: test7:
+; AVX: # %bb.0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
%x = zext <8 x i32> %v to <8 x i64>
%s = shufflevector <8 x i64> %x, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%t = trunc <4 x i64> %s to <4 x i32>
OpenPOWER on IntegriCloud