summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-08-16 17:35:08 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-08-16 17:35:08 +0000
commit8ff1b7de4dae74f84ce7f419920434b66d62d07a (patch)
tree230731da53949d13c6cd16aaa6212f2c67740e38
parent2632c677f85cba1ac2aef5d68aaf8af0f5b3c944 (diff)
downloadbcm5719-llvm-8ff1b7de4dae74f84ce7f419920434b66d62d07a.tar.gz
bcm5719-llvm-8ff1b7de4dae74f84ce7f419920434b66d62d07a.zip
[X86] combineExtractWithShuffle - handle extract(truncate(x), 0)
Eventually we need to generalize combineExtractWithShuffle to handle all faux shuffles and handle truncate (and X86ISD::VTRUNC etc.) there, but we're not ready yet (still creates nodes on the fly, incomplete DemandedElts support, bad use of recursive Depth limit). llvm-svn: 369134
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp12
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-mul.ll18
2 files changed, 15 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 297fa942c7e..a6131f9e4b5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35202,6 +35202,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ SDLoc dl(N);
SDValue Src = N->getOperand(0);
SDValue Idx = N->getOperand(1);
@@ -35223,6 +35224,16 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
return DAG.getBitcast(VT, SrcOp);
}
+ // Handle extract(truncate(x)) for 0'th index.
+ // TODO: Treat this as a faux shuffle?
+ // TODO: When can we use this for general indices?
+ if (ISD::TRUNCATE == Src.getOpcode() && SrcVT.is128BitVector() &&
+ isNullConstant(Idx)) {
+ Src = extract128BitVector(Src.getOperand(0), 0, DAG, dl);
+ Src = DAG.getBitcast(SrcVT, Src);
+ return DAG.getNode(N->getOpcode(), dl, VT, Src, Idx);
+ }
+
// Resolve the target shuffle inputs and mask.
SmallVector<int, 16> Mask;
SmallVector<SDValue, 2> Ops;
@@ -35260,7 +35271,6 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
return SDValue();
int SrcIdx = Mask[N->getConstantOperandVal(1)];
- SDLoc dl(N);
// If the shuffle source element is undef/zero then we can just accept it.
if (SrcIdx == SM_SentinelUndef)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul.ll b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
index b4cca0015bd..b70f964a542 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
@@ -1828,9 +1828,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm1
; AVX512BW-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
@@ -1852,9 +1850,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm1
; AVX512BWVL-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
+; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %eax
; AVX512BWVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512BWVL-NEXT: vzeroupper
@@ -1879,10 +1875,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQ-NEXT: vzeroupper
@@ -1907,10 +1900,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512DQVL-NEXT: vpmovdb %zmm1, %xmm1
; AVX512DQVL-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX512DQVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
-; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQVL-NEXT: vzeroupper
OpenPOWER on IntegriCloud