[CGP] Ensure sinking multiple instructions does not invalidate dominance checks

In MVE, as of rL371218, we are attempting to sink chains of instructions such as: %l1 = insertelement <8 x i8> undef, i8 %l0, i32 0 %broadcast.splat26 = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> zeroinitializer In certain situations though, we can end up breaking the dominance relations of instructions. This happens when we sink the instruction into a loop, but cannot remove the originals. The Use is updated, which might in fact be a Use from the second instruction to the first. This attempts to fix that by reversing the order of instruction that are sunk, and ensuring that we update the uses on new instructions if they have already been sunk, not the old ones. Differential Revision: https://reviews.llvm.org/D67366 llvm-svn: 371743
author: David Green <david.green@arm.com> 2019-09-12 16:00:07 +0000
committer: David Green <david.green@arm.com> 2019-09-12 16:00:07 +0000
commit: a6e944b1731107c647df7aa2c9b026d6a9818c90 (patch)
tree: 27c933cbad64ff05a7987fbeedc239c932f71d0c /llvm/test/Transforms
parent: af11cc7eb5da320066d88a8f6d015e6296f0da25 (diff)
download: bcm5719-llvm-a6e944b1731107c647df7aa2c9b026d6a9818c90.tar.gz
bcm5719-llvm-a6e944b1731107c647df7aa2c9b026d6a9818c90.zip
2 files changed, 112 insertions, 2 deletions
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-add-mul-shufflevector.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-add-mul-shufflevector.ll
index 739877b3f0c..cb0737ffaeb 100644
--- a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-add-mul-shufflevector.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-add-mul-shufflevector.ll
@@ -45,9 +45,12 @@ define void @sink_add_mul_multiple(i32* %s1, i32* %s2, i32 %x, i32* %d, i32* %d2
 ; CHECK-NOT:  [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
 ; CHECK-NOT:  [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> undef, <4 x i32> zeroinitializer
 ; CHECK:    vector.body:
-; CHECK:      [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
+; CHECK:      [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 %x, i32 0
 ; CHECK:      [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK:      [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK:      mul nsw <4 x i32> %wide.load, [[TMP3]]
+; CHECK:      [[TMP2b:%.*]] = insertelement <4 x i32> undef, i32 %x, i32 0
+; CHECK:      [[TMP3b:%.*]] = shufflevector <4 x i32> [[TMP2b]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK:      mul nsw <4 x i32> %wide.load18, [[TMP3b]]
 ;
 entry:
   %cmp13 = icmp sgt i32 %n, 0
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/sinkchain.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/sinkchain.ll
new file mode 100644
index 00000000000..c0da3eb5666
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/ARM/sinkchain.ll
@@ -0,0 +1,107 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp < %s -codegenprepare -S | FileCheck -check-prefix=CHECK %s
+
+; Sink the shufflevector/insertelement pair, followed by the trunc. The sunk instruction end up dead.
+define signext i8 @dead(i16* noalias nocapture readonly %s1, i16 zeroext %x, i8* noalias nocapture %d, i32 %n) {
+; CHECK-LABEL: @dead(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[N:%.*]], -8
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i16 [[X:%.*]] to i8
+; CHECK-NEXT:    [[L6:%.*]] = getelementptr inbounds i16, i16* [[S1:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[L7:%.*]] = bitcast i16* [[L6]] to <8 x i16>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[L7]], align 2
+; CHECK-NEXT:    [[L8:%.*]] = trunc <8 x i16> [[WIDE_LOAD]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[L9:%.*]] = mul <8 x i8> [[TMP2]], [[L8]]
+; CHECK-NEXT:    [[L13:%.*]] = getelementptr inbounds i8, i8* [[D:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[L14:%.*]] = bitcast i8* [[L13]] to <8 x i8>*
+; CHECK-NEXT:    store <8 x i8> [[L9]], <8 x i8>* [[L14]], align 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
+; CHECK-NEXT:    [[L15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[L15]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i8 0
+;
+entry:
+  %n.vec = and i32 %n, -8
+  %l0 = trunc i16 %x to i8
+  %l1 = insertelement <8 x i8> undef, i8 %l0, i32 0
+  %broadcast.splat26 = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
+  %l6 = getelementptr inbounds i16, i16* %s1, i32 %index
+  %l7 = bitcast i16* %l6 to <8 x i16>*
+  %wide.load = load <8 x i16>, <8 x i16>* %l7, align 2
+  %l8 = trunc <8 x i16> %wide.load to <8 x i8>
+  %l9 = mul <8 x i8> %broadcast.splat26, %l8
+  %l13 = getelementptr inbounds i8, i8* %d, i32 %index
+  %l14 = bitcast i8* %l13 to <8 x i8>*
+  store <8 x i8> %l9, <8 x i8>* %l14, align 1
+  %index.next = add i32 %index, 8
+  %l15 = icmp eq i32 %index.next, %n.vec
+  br i1 %l15, label %exit, label %vector.body
+
+exit:                                     ; preds = %vector.body
+  ret i8 0
+}
+
+; Same as above, but the shuffle has an extra use meaning it shouldnt be deleted
+define signext i8 @alive(i16* noalias nocapture readonly %s1, i16 zeroext %x, i8* noalias nocapture %d, i32 %n) {
+; CHECK-LABEL: @alive(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[N:%.*]], -8
+; CHECK-NEXT:    [[L0:%.*]] = trunc i16 [[X:%.*]] to i8
+; CHECK-NEXT:    [[L1:%.*]] = insertelement <8 x i8> undef, i8 [[L0]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT26:%.*]] = shufflevector <8 x i8> [[L1]], <8 x i8> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[L2:%.*]] = sub <8 x i8> zeroinitializer, [[BROADCAST_SPLAT26]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i16 [[X]] to i8
+; CHECK-NEXT:    [[L6:%.*]] = getelementptr inbounds i16, i16* [[S1:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[L7:%.*]] = bitcast i16* [[L6]] to <8 x i16>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[L7]], align 2
+; CHECK-NEXT:    [[L8:%.*]] = trunc <8 x i16> [[WIDE_LOAD]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[L9:%.*]] = mul <8 x i8> [[TMP2]], [[L8]]
+; CHECK-NEXT:    [[L13:%.*]] = getelementptr inbounds i8, i8* [[D:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[L14:%.*]] = bitcast i8* [[L13]] to <8 x i8>*
+; CHECK-NEXT:    store <8 x i8> [[L9]], <8 x i8>* [[L14]], align 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
+; CHECK-NEXT:    [[L15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[L15]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i8 0
+;
+entry:
+  %n.vec = and i32 %n, -8
+  %l0 = trunc i16 %x to i8
+  %l1 = insertelement <8 x i8> undef, i8 %l0, i32 0
+  %broadcast.splat26 = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> zeroinitializer
+  %l2 = sub <8 x i8> zeroinitializer, %broadcast.splat26
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
+  %l6 = getelementptr inbounds i16, i16* %s1, i32 %index
+  %l7 = bitcast i16* %l6 to <8 x i16>*
+  %wide.load = load <8 x i16>, <8 x i16>* %l7, align 2
+  %l8 = trunc <8 x i16> %wide.load to <8 x i8>
+  %l9 = mul <8 x i8> %broadcast.splat26, %l8
+  %l13 = getelementptr inbounds i8, i8* %d, i32 %index
+  %l14 = bitcast i8* %l13 to <8 x i8>*
+  store <8 x i8> %l9, <8 x i8>* %l14, align 1
+  %index.next = add i32 %index, 8
+  %l15 = icmp eq i32 %index.next, %n.vec
+  br i1 %l15, label %exit, label %vector.body
+
+exit:                                     ; preds = %vector.body
+  ret i8 0
+}
author	David Green <david.green@arm.com>	2019-09-12 16:00:07 +0000
committer	David Green <david.green@arm.com>	2019-09-12 16:00:07 +0000
commit	a6e944b1731107c647df7aa2c9b026d6a9818c90 (patch)
tree	27c933cbad64ff05a7987fbeedc239c932f71d0c /llvm/test/Transforms
parent	af11cc7eb5da320066d88a8f6d015e6296f0da25 (diff)
download	bcm5719-llvm-a6e944b1731107c647df7aa2c9b026d6a9818c90.tar.gz bcm5719-llvm-a6e944b1731107c647df7aa2c9b026d6a9818c90.zip