diff options
author | Sam Tebbs <sam.tebbs@arm.com> | 2019-09-06 16:01:32 +0000 |
---|---|---|
committer | Sam Tebbs <sam.tebbs@arm.com> | 2019-09-06 16:01:32 +0000 |
commit | f1cdd95a2fe79fbcd7fd440509a754bc3afaf088 (patch) | |
tree | f36f31741b8883c8c19f035cdc26ee3c9f4ee927 /llvm/test/Transforms/CodeGenPrepare | |
parent | 8d30c1dcec2a935e0b1cffc26fdc6054ff101f53 (diff) | |
download | bcm5719-llvm-f1cdd95a2fe79fbcd7fd440509a754bc3afaf088.tar.gz bcm5719-llvm-f1cdd95a2fe79fbcd7fd440509a754bc3afaf088.zip |
[ARM] Sink add/mul(shufflevector(insertelement())) for MVE instruction selection
This patch sinks add/mul(shufflevector(insertelement())) into the basic block in which they are used so that they can then be selected together.
This is useful for various MVE instructions, such as vmla and others that take R registers.
Loop tests have been added to the vmla test file to make sure vmlas are generated in loops.
Differential revision: https://reviews.llvm.org/D66295
llvm-svn: 371218
Diffstat (limited to 'llvm/test/Transforms/CodeGenPrepare')
-rw-r--r-- | llvm/test/Transforms/CodeGenPrepare/ARM/sink-add-mul-shufflevector.ll | 216 |
1 files changed, 216 insertions, 0 deletions
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-add-mul-shufflevector.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-add-mul-shufflevector.ll new file mode 100644 index 00000000000..739877b3f0c --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-add-mul-shufflevector.ll @@ -0,0 +1,216 @@ +; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp < %s -codegenprepare -S | FileCheck -check-prefix=CHECK %s + +define void @sink_add_mul(i32* %s1, i32 %x, i32* %d, i32 %n) { +; CHECK-LABEL: @sink_add_mul( +; CHECK: vector.ph: +; CHECK-NOT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 +; CHECK-NOT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK: vector.body: +; CHECK: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 +; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer +; +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %vector.ph, label %for.cond.cleanup + +vector.ph: ; preds = %for.body.preheader + %n.vec = and i32 %n, -4 + %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %x, i32 0 + %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %s1, i32 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = mul nsw <4 x i32> %wide.load, %broadcast.splat9 + %3 = getelementptr inbounds i32, i32* %d, i32 %index + %4 = bitcast i32* %3 to <4 x i32>* + %wide.load10 = load <4 x i32>, <4 x i32>* %4, align 4 + %5 = add nsw <4 x i32> %wide.load10, %2 + %6 = bitcast i32* %3 to <4 x i32>* + store <4 x i32> %5, <4 x i32>* %6, align 4 + %index.next = add i32 %index, 4 + %7 = icmp eq i32 %index.next, %n.vec + br i1 %7, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %for.body, %middle.block, %entry + ret void +} + +define void @sink_add_mul_multiple(i32* %s1, i32* %s2, i32 %x, i32* %d, i32* %d2, i32 %n) { +; CHECK-LABEL: @sink_add_mul_multiple( +; CHECK: vector.ph: +; CHECK-NOT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 +; CHECK-NOT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK: vector.body: +; CHECK: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 +; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer +; +entry: + %cmp13 = icmp sgt i32 %n, 0 + br i1 %cmp13, label %vector.ph, label %for.cond.cleanup + +vector.ph: ; preds = %for.body.preheader + %n.vec = and i32 %n, -4 + %broadcast.splatinsert15 = insertelement <4 x i32> undef, i32 %x, i32 0 + %broadcast.splat16 = shufflevector <4 x i32> %broadcast.splatinsert15, <4 x i32> undef, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %s1, i32 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = mul nsw <4 x i32> %wide.load, %broadcast.splat16 + %3 = getelementptr inbounds i32, i32* %d, i32 %index + %4 = bitcast i32* %3 to <4 x i32>* + %wide.load17 = load <4 x i32>, <4 x i32>* %4, align 4 + %5 = add nsw <4 x i32> %wide.load17, %2 + %6 = bitcast i32* %3 to <4 x i32>* + store <4 x i32> %5, <4 x i32>* %6, align 4 + %7 = getelementptr inbounds i32, i32* %s2, i32 %index + %8 = bitcast i32* %7 to <4 x i32>* + %wide.load18 = load <4 x i32>, <4 x i32>* %8, align 4 + %9 = mul nsw <4 x i32> %wide.load18, %broadcast.splat16 + %10 = getelementptr inbounds i32, i32* %d2, i32 %index + %11 = bitcast i32* %10 to <4 x i32>* + %wide.load19 = load <4 x i32>, <4 x i32>* %11, align 4 + %12 = add nsw <4 x i32> %wide.load19, %9 + %13 = bitcast i32* %10 to <4 x i32>* + store <4 x i32> %12, <4 x i32>* %13, align 4 + %index.next = add i32 %index, 4 + %14 = icmp eq i32 %index.next, %n.vec + br i1 %14, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %for.body, %middle.block, %entry + ret void +} + + +define void @sink_add_sub_unsinkable(i32* %s1, i32* %s2, i32 %x, i32* %d, i32* %d2, i32 %n) { +; CHECK-LABEL: @sink_add_sub_unsinkable( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP13]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N]], -4 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT15]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; +entry: + %cmp13 = icmp sgt i32 %n, 0 + br i1 %cmp13, label %vector.ph, label %for.cond.cleanup + +vector.ph: ; preds = %for.body.preheader + %n.vec = and i32 %n, -4 + %broadcast.splatinsert15 = insertelement <4 x i32> undef, i32 %x, i32 0 + %broadcast.splat16 = shufflevector <4 x i32> %broadcast.splatinsert15, <4 x i32> undef, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %s1, i32 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = mul nsw <4 x i32> %wide.load, %broadcast.splat16 + %3 = getelementptr inbounds i32, i32* %d, i32 %index + %4 = bitcast i32* %3 to <4 x i32>* + %wide.load17 = load <4 x i32>, <4 x i32>* %4, align 4 + %5 = add nsw <4 x i32> %wide.load17, %2 + %6 = bitcast i32* %3 to <4 x i32>* + store <4 x i32> %5, <4 x i32>* %6, align 4 + %7 = getelementptr inbounds i32, i32* %s2, i32 %index + %8 = bitcast i32* %7 to <4 x i32>* + %wide.load18 = load <4 x i32>, <4 x i32>* %8, align 4 + %9 = sub nsw <4 x i32> %broadcast.splat16, %wide.load18 + %10 = getelementptr inbounds i32, i32* %d2, i32 %index + %11 = bitcast i32* %10 to <4 x i32>* + %wide.load19 = load <4 x i32>, <4 x i32>* %11, align 4 + %12 = add nsw <4 x i32> %wide.load19, %9 + %13 = bitcast i32* %10 to <4 x i32>* + store <4 x i32> %12, <4 x i32>* %13, align 4 + %index.next = add i32 %index, 4 + %14 = icmp eq i32 %index.next, %n.vec + br i1 %14, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %for.body, %middle.block, %entry + ret void +} + +define void @sink_sub(i32* %s1, i32 %x, i32* %d, i32 %n) { +; CHECK-LABEL: @sink_sub( +; CHECK: vector.ph: +; CHECK-NOT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 +; CHECK-NOT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK: vector.body: +; CHECK: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 +; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer +; +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %vector.ph, label %for.cond.cleanup + +vector.ph: ; preds = %for.body.preheader + %n.vec = and i32 %n, -4 + %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %x, i32 0 + %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %s1, i32 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = sub nsw <4 x i32> %wide.load, %broadcast.splat9 + %3 = getelementptr inbounds i32, i32* %d, i32 %index + %4 = bitcast i32* %3 to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %4, align 4 + %index.next = add i32 %index, 4 + %5 = icmp eq i32 %index.next, %n.vec + br i1 %5, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %for.body, %middle.block, %entry + ret void +} + +define void @sink_sub_unsinkable(i32* %s1, i32 %x, i32* %d, i32 %n) { +entry: +; CHECK-LABEL: @sink_sub_unsinkable( +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N]], -4 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT15]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NOT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 +; CHECK-NOT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer +; + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %vector.ph, label %for.cond.cleanup + +vector.ph: ; preds = %for.body.preheader + %n.vec = and i32 %n, -4 + %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %x, i32 0 + %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %s1, i32 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = sub nsw <4 x i32> %broadcast.splat9, %wide.load + %3 = getelementptr inbounds i32, i32* %d, i32 %index + %4 = bitcast i32* %3 to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %4, align 4 + %index.next = add i32 %index, 4 + %5 = icmp eq i32 %index.next, %n.vec + br i1 %5, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %for.body, %middle.block, %entry + ret void +} |