[SLP] Stop counting cost of gather sequences with multiple uses

When building the SLP tree, we look for reuse among the vectorized tree entries. However, each gather sequence is represented by a unique tree entry, even though the sequence may be identical to another one. This means, for example, that a gather sequence with two uses will be counted twice when computing the cost of the tree. We should only count the cost of the definition of a gather sequence rather than its uses. During code generation, the redundant gather sequences are emitted, but we optimize them away with CSE. So it looks like this problem just affects the cost model. Differential Revision: https://reviews.llvm.org/D44742 llvm-svn: 328316
author: Matthew Simpson <mssimpso@codeaurora.org> 2018-03-23 14:18:27 +0000
committer: Matthew Simpson <mssimpso@codeaurora.org> 2018-03-23 14:18:27 +0000
commit: 6c289a1c744802c317986989915cc328d25c210d (patch)
tree: 925d62a708db48084aa7f09f1233baaca01c3e93 /llvm/test
parent: a92bcbb2c8c5506d5af50a0d0c946144e212ebbb (diff)
download: bcm5719-llvm-6c289a1c744802c317986989915cc328d25c210d.tar.gz
bcm5719-llvm-6c289a1c744802c317986989915cc328d25c210d.zip
2 files changed, 21 insertions, 34 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
index aec33c57160..a4c655ca3cb 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
@@ -7,36 +7,22 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-gnu"
 
 ; CHECK-LABEL:  @gather_multiple_use(
-; CHECK-NEXT:     [[TMP00:%.*]] = lshr i32 [[A:%.*]], 15
-; CHECK-NEXT:     [[TMP01:%.*]] = and i32 [[TMP00]], 65537
-; CHECK-NEXT:     [[TMP02:%.*]] = mul nuw i32 [[TMP01]], 65535
-; CHECK-NEXT:     [[TMP03:%.*]] = add i32 [[TMP02]], [[A]]
-; CHECK-NEXT:     [[TMP04:%.*]] = xor i32 [[TMP03]], [[TMP02]]
-; CHECK-NEXT:     [[TMP05:%.*]] = lshr i32 [[C:%.*]], 15
-; CHECK-NEXT:     [[TMP06:%.*]] = and i32 [[TMP05]], 65537
-; CHECK-NEXT:     [[TMP07:%.*]] = mul nuw i32 [[TMP06]], 65535
-; CHECK-NEXT:     [[TMP08:%.*]] = add i32 [[TMP07]], [[C]]
-; CHECK-NEXT:     [[TMP09:%.*]] = xor i32 [[TMP08]], [[TMP07]]
-; CHECK-NEXT:     [[TMP10:%.*]] = lshr i32 [[B:%.*]], 15
-; CHECK-NEXT:     [[TMP11:%.*]] = and i32 [[TMP10]], 65537
-; CHECK-NEXT:     [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 65535
-; CHECK-NEXT:     [[TMP13:%.*]] = add i32 [[TMP12]], [[B]]
-; CHECK-NEXT:     [[TMP14:%.*]] = xor i32 [[TMP13]], [[TMP12]]
-; CHECK-NEXT:     [[TMP15:%.*]] = lshr i32 [[D:%.*]], 15
-; CHECK-NEXT:     [[TMP16:%.*]] = and i32 [[TMP15]], 65537
-; CHECK-NEXT:     [[TMP17:%.*]] = mul nuw i32 [[TMP16]], 65535
-; CHECK-NEXT:     [[TMP18:%.*]] = add i32 [[TMP17]], [[D]]
-; CHECK-NEXT:     [[TMP19:%.*]] = xor i32 [[TMP18]], [[TMP17]]
-; CHECK-NEXT:     [[TMP20:%.*]] = add i32 [[TMP09]], [[TMP04]]
-; CHECK-NEXT:     [[TMP21:%.*]] = add i32 [[TMP20]], [[TMP14]]
-; CHECK-NEXT:     [[TMP22:%.*]] = add i32 [[TMP21]], [[TMP19]]
-; CHECK-NEXT:     ret i32 [[TMP22]]
+; CHECK-NEXT:     [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
+; CHECK-NEXT:     [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A:%.*]], i32 1
+; CHECK-NEXT:     [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i32 2
+; CHECK-NEXT:     [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[D:%.*]], i32 3
+; CHECK-NEXT:     [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:     [[TMP6:%.*]] = and <4 x i32> [[TMP5]], <i32 65537, i32 65537, i32 65537, i32 65537>
+; CHECK-NEXT:     [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], <i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:     [[TMP8:%.*]] = add <4 x i32> [[TMP4]], [[TMP7]]
+; CHECK-NEXT:     [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], [[TMP7]]
+; CHECK-NEXT:     [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP9]])
+; CHECK-NEXT:     ret i32 [[TMP10]]
 ;
 ; REMARK-LABEL: Function: gather_multiple_use
 ; REMARK:       Args:
-; REMARK-NEXT:    - String: Vectorizing horizontal reduction is possible
-; REMARK-NEXT:    - String: 'but not beneficial with cost '
-; REMARK-NEXT:    - Cost: '2'
+; REMARK-NEXT:    - String: 'Vectorized horizontal reduction with cost '
+; REMARK-NEXT:    - Cost: '-7'
 ;
 define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
   %tmp00 = lshr i32 %a, 15
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
index 00ac87d6a99..54dbc13934e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
@@ -141,15 +141,16 @@ define i8 @k_bb(<4 x i8> %x) {
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
+; CHECK-NEXT:    [[X1:%.*]] = extractelement <4 x i8> [[X]], i32 1
+; CHECK-NEXT:    [[X2:%.*]] = extractelement <4 x i8> [[X]], i32 2
 ; CHECK-NEXT:    [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
 ; CHECK-NEXT:    [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
-; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i8> [[X]], [[X]]
-; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[X0X0]], [[X3X3]]
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2
-; CHECK-NEXT:    [[TMP5:%.*]] = add i8 [[TMP3]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = sdiv i8 [[TMP2]], [[TMP5]]
-; CHECK-NEXT:    ret i8 [[TMP6]]
+; CHECK-NEXT:    [[X1X1:%.*]] = mul i8 [[X1]], [[X1]]
+; CHECK-NEXT:    [[X2X2:%.*]] = mul i8 [[X2]], [[X2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[X1X1]], [[X2X2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sdiv i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i8 [[TMP3]]
 ;
   %x0 = extractelement <4 x i8> %x, i32 0
   br label %bb1
author	Matthew Simpson <mssimpso@codeaurora.org>	2018-03-23 14:18:27 +0000
committer	Matthew Simpson <mssimpso@codeaurora.org>	2018-03-23 14:18:27 +0000
commit	6c289a1c744802c317986989915cc328d25c210d (patch)
tree	925d62a708db48084aa7f09f1233baaca01c3e93 /llvm/test
parent	a92bcbb2c8c5506d5af50a0d0c946144e212ebbb (diff)
download	bcm5719-llvm-6c289a1c744802c317986989915cc328d25c210d.tar.gz bcm5719-llvm-6c289a1c744802c317986989915cc328d25c210d.zip