summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2018-11-28 14:34:11 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2018-11-28 14:34:11 +0000
commit579c2d9d64f891491cebcfcd953c827a34147479 (patch)
treea0c2835e329a1bc4c1622216f1b360cbad3f6854 /llvm/test/Transforms
parent30ce962732cc7d22249dc657141a6d7e39f9418e (diff)
downloadbcm5719-llvm-579c2d9d64f891491cebcfcd953c827a34147479.tar.gz
bcm5719-llvm-579c2d9d64f891491cebcfcd953c827a34147479.zip
[SLP]Fix PR39774: Set ReductionRoot if the original instruction is vectorized.
Summary: If the original reduction root instruction was vectorized, it might be removed from the tree. It means that the insertion point may become invalidated and the whole vectorization of the reduction leads to the incorrect output result. The ReductionRoot instruction must be marked as externally used so it could not be removed. Otherwise it might cause inconsistency with the cost model and we may end up with too optimistic optimization. Reviewers: RKSimon, spatel, hfinkel, mkuper Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D54955 llvm-svn: 347759
Diffstat (limited to 'llvm/test/Transforms')
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll108
1 files changed, 108 insertions, 0 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
new file mode 100644
index 00000000000..3ebccb04d07
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-7 | FileCheck %s
+
+define void @Test(i32) {
+; CHECK-LABEL: @Test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[LOCAL_4_39_US:%.*]] = phi i32 [ [[VAL_42:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[LOCAL_8_43_US:%.*]] = phi i32 [ [[VAL_43:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[VAL_0:%.*]] = add i32 [[LOCAL_4_39_US]], 0
+; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[LOCAL_8_43_US]], [[VAL_0]]
+; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
+; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
+; CHECK-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
+; CHECK-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
+; CHECK-NEXT: [[VAL_6:%.*]] = add i32 [[LOCAL_8_43_US]], 55
+; CHECK-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], [[VAL_6]]
+; CHECK-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
+; CHECK-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
+; CHECK-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
+; CHECK-NEXT: [[VAL_11:%.*]] = add i32 [[LOCAL_8_43_US]], 285
+; CHECK-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], [[VAL_11]]
+; CHECK-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
+; CHECK-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
+; CHECK-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
+; CHECK-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
+; CHECK-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
+; CHECK-NEXT: [[VAL_18:%.*]] = add i32 [[LOCAL_8_43_US]], 1240
+; CHECK-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], [[VAL_18]]
+; CHECK-NEXT: [[VAL_20:%.*]] = add i32 [[LOCAL_8_43_US]], 1496
+; CHECK-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]]
+; CHECK-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
+; CHECK-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
+; CHECK-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
+; CHECK-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
+; CHECK-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
+; CHECK-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
+; CHECK-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
+; CHECK-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
+; CHECK-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
+; CHECK-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
+; CHECK-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
+; CHECK-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
+; CHECK-NEXT: [[VAL_34:%.*]] = add i32 [[LOCAL_8_43_US]], 8555
+; CHECK-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]]
+; CHECK-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
+; CHECK-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
+; CHECK-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
+; CHECK-NEXT: [[VAL_39:%.*]] = add i32 [[LOCAL_8_43_US]], 12529
+; CHECK-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], [[VAL_39]]
+; CHECK-NEXT: [[VAL_41:%.*]] = add i32 [[LOCAL_8_43_US]], 13685
+; CHECK-NEXT: [[VAL_42]] = and i32 [[VAL_40]], [[VAL_41]]
+; CHECK-NEXT: [[VAL_43]] = add i32 [[LOCAL_8_43_US]], 14910
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %local_4_39.us = phi i32 [ %val_42, %loop ], [ 0, %entry ]
+ %local_8_43.us = phi i32 [ %val_43, %loop ], [ 0, %entry ]
+ %val_0 = add i32 %local_4_39.us, 0
+ %val_1 = and i32 %local_8_43.us, %val_0
+ %val_2 = and i32 %val_1, %0
+ %val_3 = and i32 %val_2, %0
+ %val_4 = and i32 %val_3, %0
+ %val_5 = and i32 %val_4, %0
+ %val_6 = add i32 %local_8_43.us, 55
+ %val_7 = and i32 %val_5, %val_6
+ %val_8 = and i32 %val_7, %0
+ %val_9 = and i32 %val_8, %0
+ %val_10 = and i32 %val_9, %0
+ %val_11 = add i32 %local_8_43.us, 285
+ %val_12 = and i32 %val_10, %val_11
+ %val_13 = and i32 %val_12, %0
+ %val_14 = and i32 %val_13, %0
+ %val_15 = and i32 %val_14, %0
+ %val_16 = and i32 %val_15, %0
+ %val_17 = and i32 %val_16, %0
+ %val_18 = add i32 %local_8_43.us, 1240
+ %val_19 = and i32 %val_17, %val_18
+ %val_20 = add i32 %local_8_43.us, 1496
+ %val_21 = and i32 %val_19, %val_20
+ %val_22 = and i32 %val_21, %0
+ %val_23 = and i32 %val_22, %0
+ %val_24 = and i32 %val_23, %0
+ %val_25 = and i32 %val_24, %0
+ %val_26 = and i32 %val_25, %0
+ %val_27 = and i32 %val_26, %0
+ %val_28 = and i32 %val_27, %0
+ %val_29 = and i32 %val_28, %0
+ %val_30 = and i32 %val_29, %0
+ %val_31 = and i32 %val_30, %0
+ %val_32 = and i32 %val_31, %0
+ %val_33 = and i32 %val_32, %0
+ %val_34 = add i32 %local_8_43.us, 8555
+ %val_35 = and i32 %val_33, %val_34
+ %val_36 = and i32 %val_35, %0
+ %val_37 = and i32 %val_36, %0
+ %val_38 = and i32 %val_37, %0
+ %val_39 = add i32 %local_8_43.us, 12529
+ %val_40 = and i32 %val_38, %val_39
+ %val_41 = add i32 %local_8_43.us, 13685
+ %val_42 = and i32 %val_40, %val_41
+ %val_43 = add i32 %local_8_43.us, 14910
+ br label %loop
+}
OpenPOWER on IntegriCloud