summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-03-30 17:12:29 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-03-30 17:12:29 +0000
commit10c9032c02394c6dd6102f4387cd315f19738616 (patch)
tree4b9bc7d660c8ae2a7f31aab5872f75b23ebbc0d4
parentb5498cbf64e56af50d3c9ca9b415ffbb5faa00d9 (diff)
downloadbcm5719-llvm-10c9032c02394c6dd6102f4387cd315f19738616.tar.gz
bcm5719-llvm-10c9032c02394c6dd6102f4387cd315f19738616.zip
[X86][SSE] detectAVGPattern - Match zext(or(x,y)) 'add like' patterns (PR41316)
Fixes PR41316 where the expanded PAVG intrinsic had had one of its ADDs turned into an OR due to its operands having no conflicting bits. llvm-svn: 357351
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp31
-rw-r--r--llvm/test/CodeGen/X86/avg.ll66
2 files changed, 28 insertions, 69 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6c62e661b5a..c085bc557ee 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38203,10 +38203,19 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
AVGBuilder);
}
- // Matches 'add like' patterns.
- // TODO: Extend this to include or/zext cases.
+ // Matches 'add like' patterns: add(Op0,Op1) + zext(or(Op0,Op1)).
+ // Match the or case only if its 'add-like' - can be replaced by an add.
auto FindAddLike = [&](SDValue V, SDValue &Op0, SDValue &Op1) {
- if (ISD::ADD != V.getOpcode())
+ if (ISD::ADD == V.getOpcode()) {
+ Op0 = V.getOperand(0);
+ Op1 = V.getOperand(1);
+ return true;
+ }
+ if (ISD::ZERO_EXTEND != V.getOpcode())
+ return false;
+ V = V.getOperand(0);
+ if (V.getValueType() != VT || ISD::OR != V.getOpcode() ||
+ !DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1)))
return false;
Op0 = V.getOperand(0);
Op1 = V.getOperand(1);
@@ -38222,7 +38231,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
Operands[1] = Op1;
// Now we have three operands of two additions. Check that one of them is a
- // constant vector with ones, and the other two are promoted from i8/i16.
+ // constant vector with ones, and the other two can be promoted from i8/i16.
for (int i = 0; i < 3; ++i) {
if (!IsConstVectorInRange(Operands[i], 1, 1))
continue;
@@ -38230,14 +38239,16 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
// Check if Operands[0] and Operands[1] are results of type promotion.
for (int j = 0; j < 2; ++j)
- if (Operands[j].getOpcode() != ISD::ZERO_EXTEND ||
- Operands[j].getOperand(0).getValueType() != VT)
- return SDValue();
+ if (Operands[j].getValueType() != VT) {
+ if (Operands[j].getOpcode() != ISD::ZERO_EXTEND ||
+ Operands[j].getOperand(0).getValueType() != VT)
+ return SDValue();
+ Operands[j] = Operands[j].getOperand(0);
+ }
// The pattern is detected, emit X86ISD::AVG instruction(s).
- return SplitOpsAndApply(DAG, Subtarget, DL, VT,
- { Operands[0].getOperand(0),
- Operands[1].getOperand(0) }, AVGBuilder);
+ return SplitOpsAndApply(DAG, Subtarget, DL, VT, {Operands[0], Operands[1]},
+ AVGBuilder);
}
return SDValue();
diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll
index 15074221827..0494b0aeda9 100644
--- a/llvm/test/CodeGen/X86/avg.ll
+++ b/llvm/test/CodeGen/X86/avg.ll
@@ -2479,67 +2479,15 @@ define <2 x i64> @PR41316(<2 x i64>, <2 x i64>) {
; SSE2: # %bb.0:
; SSE2-NEXT: psllw $2, %xmm0
; SSE2-NEXT: psllw $2, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: movdqa %xmm1, %xmm3
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; SSE2-NEXT: por {{.*}}(%rip), %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm4
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE2-NEXT: paddd %xmm3, %xmm4
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT: paddd %xmm1, %xmm0
-; SSE2-NEXT: pslld $15, %xmm4
-; SSE2-NEXT: psrad $16, %xmm4
-; SSE2-NEXT: pslld $15, %xmm0
-; SSE2-NEXT: psrad $16, %xmm0
-; SSE2-NEXT: packssdw %xmm4, %xmm0
+; SSE2-NEXT: pavgw %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; AVX1-LABEL: PR41316:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpsllw $2, %xmm0, %xmm0
-; AVX1-NEXT: vpsllw $2, %xmm1, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrld $1, %xmm2, %xmm1
-; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
-; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: PR41316:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsllw $2, %xmm0, %xmm0
-; AVX2-NEXT: vpsllw $2, %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsrld $1, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: PR41316:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsllw $2, %xmm0, %xmm0
-; AVX512-NEXT: vpsllw $2, %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpsrld $1, %ymm0, %ymm0
-; AVX512-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: PR41316:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsllw $2, %xmm0, %xmm0
+; AVX-NEXT: vpsllw $2, %xmm1, %xmm1
+; AVX-NEXT: vpavgw %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
%3 = bitcast <2 x i64> %0 to <8 x i16>
%4 = shl <8 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
%5 = bitcast <2 x i64> %1 to <8 x i16>
OpenPOWER on IntegriCloud