diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2018-01-24 17:50:53 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2018-01-24 17:50:53 +0000 |
commit | 4bd8e5332fa5e697bec7ce2cc6642b9708e36e46 (patch) | |
tree | 97c530d3db7577c85395faf3c707660b7611a50d /llvm/test | |
parent | abdcc613b6f7a8ff4463f3adc19cc9cc46e91d23 (diff) | |
download | bcm5719-llvm-4bd8e5332fa5e697bec7ce2cc6642b9708e36e46.tar.gz bcm5719-llvm-4bd8e5332fa5e697bec7ce2cc6642b9708e36e46.zip |
[SLP] Fix for PR32086: Count InsertElementInstr of the same elements as shuffle.
Summary:
If the same value is going to be vectorized several times in the same
tree entry, this entry is considered to be a gather entry and cost of
this gather is counter as cost of InsertElementInstrs for each gathered
value. But we can consider these elements as ShuffleInstr with
SK_PermuteSingle shuffle kind.
Reviewers: spatel, RKSimon, mkuper, hfinkel
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D38697
llvm-svn: 323348
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll | 11 | ||||
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll | 20 | ||||
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/hoist.ll | 17 |
3 files changed, 24 insertions, 24 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll index ee9ee7c34de..7613fadb5ab 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll @@ -4,15 +4,14 @@ define void @i64_simplified(i64* noalias %st, i64* noalias %ld) { ; CHECK-LABEL: @i64_simplified( ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1 -; CHECK-NEXT: [[T0:%.*]] = load i64, i64* [[LD]], align 8 -; CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD]] to <2 x i64>* +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3 -; CHECK-NEXT: store i64 [[T0]], i64* [[ST]], align 8 -; CHECK-NEXT: store i64 [[T1]], i64* [[ARRAYIDX3]], align 8 -; CHECK-NEXT: store i64 [[T0]], i64* [[ARRAYIDX4]], align 8 -; CHECK-NEXT: store i64 [[T1]], i64* [[ARRAYIDX5]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ST]] to <4 x i64>* +; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]], align 8 ; CHECK-NEXT: ret void ; %arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll index 22dfffa722a..00ac87d6a99 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll @@ -137,17 +137,19 @@ define i8 @k(<4 x i8> %x) { define i8 @k_bb(<4 x i8> %x) { ; CHECK-LABEL: @k_bb( +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[X:%.*]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> undef, <2 x i32> <i32 0, i32 1> -; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i8> [[X]], [[X]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> undef, <2 x i32> <i32 3, i32 2> -; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]] -; CHECK-NEXT: ret i8 [[TMP8]] +; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3 +; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]] +; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]] +; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[X]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X0X0]], [[X3X3]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = add i8 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = sdiv i8 [[TMP2]], [[TMP5]] +; CHECK-NEXT: ret i8 [[TMP6]] ; %x0 = extractelement <4 x i8> %x, i32 0 br label %bb1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll b/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll index 7d0379eab90..885d11acfa1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll @@ -16,19 +16,18 @@ target triple = "i386-apple-macosx10.9.0" define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[K:%.*]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[N]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[K]], i32 3 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[N:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[K:%.*]], i32 1 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_024:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD10:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_024]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[SHUFFLE]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: [[ADD10]] = add nsw i32 [[I_024]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD10]], 10000 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] |