diff options
| author | Alexey Bataev <a.bataev@hotmail.com> | 2018-11-28 14:34:11 +0000 |
|---|---|---|
| committer | Alexey Bataev <a.bataev@hotmail.com> | 2018-11-28 14:34:11 +0000 |
| commit | 579c2d9d64f891491cebcfcd953c827a34147479 (patch) | |
| tree | a0c2835e329a1bc4c1622216f1b360cbad3f6854 /llvm/test/Transforms | |
| parent | 30ce962732cc7d22249dc657141a6d7e39f9418e (diff) | |
| download | bcm5719-llvm-579c2d9d64f891491cebcfcd953c827a34147479.tar.gz bcm5719-llvm-579c2d9d64f891491cebcfcd953c827a34147479.zip | |
[SLP]Fix PR39774: Set ReductionRoot if the original instruction is vectorized.
Summary:
If the original reduction root instruction was vectorized, it might be
removed from the tree. It means that the insertion point may become
invalidated and the whole vectorization of the reduction leads to the
incorrect output result.
The ReductionRoot instruction must be marked as externally used so it
could not be removed. Otherwise it might cause inconsistency with the
cost model and we may end up with too optimistic optimization.
Reviewers: RKSimon, spatel, hfinkel, mkuper
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D54955
llvm-svn: 347759
Diffstat (limited to 'llvm/test/Transforms')
| -rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll | 108 |
1 files changed, 108 insertions, 0 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll new file mode 100644 index 00000000000..3ebccb04d07 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-7 | FileCheck %s + +define void @Test(i32) { +; CHECK-LABEL: @Test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOCAL_4_39_US:%.*]] = phi i32 [ [[VAL_42:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LOCAL_8_43_US:%.*]] = phi i32 [ [[VAL_43:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[VAL_0:%.*]] = add i32 [[LOCAL_4_39_US]], 0 +; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[LOCAL_8_43_US]], [[VAL_0]] +; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] +; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] +; CHECK-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]] +; CHECK-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]] +; CHECK-NEXT: [[VAL_6:%.*]] = add i32 [[LOCAL_8_43_US]], 55 +; CHECK-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], [[VAL_6]] +; CHECK-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]] +; CHECK-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]] +; CHECK-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]] +; CHECK-NEXT: [[VAL_11:%.*]] = add i32 [[LOCAL_8_43_US]], 285 +; CHECK-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], [[VAL_11]] +; CHECK-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]] +; CHECK-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]] +; CHECK-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]] +; CHECK-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]] +; CHECK-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]] +; CHECK-NEXT: [[VAL_18:%.*]] = add i32 [[LOCAL_8_43_US]], 1240 +; CHECK-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], [[VAL_18]] +; CHECK-NEXT: [[VAL_20:%.*]] = add i32 [[LOCAL_8_43_US]], 1496 +; CHECK-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]] +; CHECK-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]] +; CHECK-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]] +; CHECK-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]] +; CHECK-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]] +; CHECK-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]] +; CHECK-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]] +; CHECK-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]] +; CHECK-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]] +; CHECK-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]] +; CHECK-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]] +; CHECK-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]] +; CHECK-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]] +; CHECK-NEXT: [[VAL_34:%.*]] = add i32 [[LOCAL_8_43_US]], 8555 +; CHECK-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]] +; CHECK-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]] +; CHECK-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]] +; CHECK-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]] +; CHECK-NEXT: [[VAL_39:%.*]] = add i32 [[LOCAL_8_43_US]], 12529 +; CHECK-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], [[VAL_39]] +; CHECK-NEXT: [[VAL_41:%.*]] = add i32 [[LOCAL_8_43_US]], 13685 +; CHECK-NEXT: [[VAL_42]] = and i32 [[VAL_40]], [[VAL_41]] +; CHECK-NEXT: [[VAL_43]] = add i32 [[LOCAL_8_43_US]], 14910 +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + %local_4_39.us = phi i32 [ %val_42, %loop ], [ 0, %entry ] + %local_8_43.us = phi i32 [ %val_43, %loop ], [ 0, %entry ] + %val_0 = add i32 %local_4_39.us, 0 + %val_1 = and i32 %local_8_43.us, %val_0 + %val_2 = and i32 %val_1, %0 + %val_3 = and i32 %val_2, %0 + %val_4 = and i32 %val_3, %0 + %val_5 = and i32 %val_4, %0 + %val_6 = add i32 %local_8_43.us, 55 + %val_7 = and i32 %val_5, %val_6 + %val_8 = and i32 %val_7, %0 + %val_9 = and i32 %val_8, %0 + %val_10 = and i32 %val_9, %0 + %val_11 = add i32 %local_8_43.us, 285 + %val_12 = and i32 %val_10, %val_11 + %val_13 = and i32 %val_12, %0 + %val_14 = and i32 %val_13, %0 + %val_15 = and i32 %val_14, %0 + %val_16 = and i32 %val_15, %0 + %val_17 = and i32 %val_16, %0 + %val_18 = add i32 %local_8_43.us, 1240 + %val_19 = and i32 %val_17, %val_18 + %val_20 = add i32 %local_8_43.us, 1496 + %val_21 = and i32 %val_19, %val_20 + %val_22 = and i32 %val_21, %0 + %val_23 = and i32 %val_22, %0 + %val_24 = and i32 %val_23, %0 + %val_25 = and i32 %val_24, %0 + %val_26 = and i32 %val_25, %0 + %val_27 = and i32 %val_26, %0 + %val_28 = and i32 %val_27, %0 + %val_29 = and i32 %val_28, %0 + %val_30 = and i32 %val_29, %0 + %val_31 = and i32 %val_30, %0 + %val_32 = and i32 %val_31, %0 + %val_33 = and i32 %val_32, %0 + %val_34 = add i32 %local_8_43.us, 8555 + %val_35 = and i32 %val_33, %val_34 + %val_36 = and i32 %val_35, %0 + %val_37 = and i32 %val_36, %0 + %val_38 = and i32 %val_37, %0 + %val_39 = add i32 %local_8_43.us, 12529 + %val_40 = and i32 %val_38, %val_39 + %val_41 = add i32 %local_8_43.us, 13685 + %val_42 = and i32 %val_40, %val_41 + %val_43 = add i32 %local_8_43.us, 14910 + br label %loop +} |

