[ARM] Improve smul* and smla* isel for Thumb2

Added (sra (shl x, 16), 16) to the sext_16_node PatLeaf for ARM to simplify some pattern matching. This has allowed several patterns for smul* and smla* to be removed as well as making it easier to add the matching for the corresponding instructions for Thumb2 targets. Also added two Pat classes that are predicated on Thumb2 with the hasDSP flag and UseMulOps flags. Updated the smul codegen test with the wider range of patterns plus the ThumbV6 and ThumbV6T2 targets. Differential Revision: https://reviews.llvm.org/D22908 llvm-svn: 277450
author: Sam Parker <sam.parker@arm.com> 2016-08-02 12:44:27 +0000
committer: Sam Parker <sam.parker@arm.com> 2016-08-02 12:44:27 +0000
commit: 18bc3a002ed51c9eeef7a591c36baf930d6b8c8f (patch)
tree: 27430af11f44f08f7250f6cdf2b13d6d3044814e /llvm/test/CodeGen/ARM
parent: 78692ea590046c7c9a66e58a14165ff6b16b5ae5 (diff)
download: bcm5719-llvm-18bc3a002ed51c9eeef7a591c36baf930d6b8c8f.tar.gz
bcm5719-llvm-18bc3a002ed51c9eeef7a591c36baf930d6b8c8f.zip
1 files changed, 157 insertions, 34 deletions
diff --git a/llvm/test/CodeGen/ARM/smul.ll b/llvm/test/CodeGen/ARM/smul.ll
index 4e6f7d5c67b..3c187aa846d 100644
--- a/llvm/test/CodeGen/ARM/smul.ll
+++ b/llvm/test/CodeGen/ARM/smul.ll
@@ -1,43 +1,46 @@
 ; RUN: llc -mtriple=arm-eabi -mcpu=generic %s -o /dev/null
 ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
 ; RUN: llc -mtriple=thumb--none-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv6t2-none-eabi %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv6-none-eabi %s -o - | FileCheck %s -check-prefix=CHECK-THUMBV6
 
-@x = weak global i16 0          ; <i16*> [#uses=1]
-@y = weak global i16 0          ; <i16*> [#uses=0]
-
-define i32 @f1(i32 %y) {
+define i32 @f1(i16 %x, i32 %y) {
 ; CHECK-LABEL: f1:
-; CHECK: smulbt
-        %tmp = load i16, i16* @x             ; <i16> [#uses=1]
-        %tmp1 = add i16 %tmp, 2         ; <i16> [#uses=1]
-        %tmp2 = sext i16 %tmp1 to i32           ; <i32> [#uses=1]
-        %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
-        %tmp4 = mul i32 %tmp2, %tmp3            ; <i32> [#uses=1]
-        ret i32 %tmp4
+; CHECK-NOT: sxth
+; CHECK: {{smulbt r0, r0, r1|smultb r0, r1, r0}}
+; CHECK-THUMBV6-NOT: {{smulbt|smultb}}
+        %tmp1 = sext i16 %x to i32
+        %tmp2 = ashr i32 %y, 16
+        %tmp3 = mul i32 %tmp2, %tmp1
+        ret i32 %tmp3
 }
 
 define i32 @f2(i32 %x, i32 %y) {
 ; CHECK-LABEL: f2:
 ; CHECK: smultt
-        %tmp1 = ashr i32 %x, 16         ; <i32> [#uses=1]
-        %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
-        %tmp4 = mul i32 %tmp3, %tmp1            ; <i32> [#uses=1]
+; CHECK-THUMBV6-NOT: smultt
+        %tmp1 = ashr i32 %x, 16
+        %tmp3 = ashr i32 %y, 16
+        %tmp4 = mul i32 %tmp3, %tmp1
         ret i32 %tmp4
 }
 
 define i32 @f3(i32 %a, i16 %x, i32 %y) {
 ; CHECK-LABEL: f3:
-; CHECK: smlabt
-        %tmp = sext i16 %x to i32               ; <i32> [#uses=1]
-        %tmp2 = ashr i32 %y, 16         ; <i32> [#uses=1]
-        %tmp3 = mul i32 %tmp2, %tmp             ; <i32> [#uses=1]
-        %tmp5 = add i32 %tmp3, %a               ; <i32> [#uses=1]
+; CHECK-NOT: sxth
+; CHECK: {{smlabt r0, r1, r2, r0|smlatb r0, r2, r1, r0}}
+; CHECK-THUMBV6-NOT: {{smlabt|smlatb}}
+        %tmp = sext i16 %x to i32
+        %tmp2 = ashr i32 %y, 16
+        %tmp3 = mul i32 %tmp2, %tmp
+        %tmp5 = add i32 %tmp3, %a
         ret i32 %tmp5
 }
 
 define i32 @f4(i32 %a, i32 %x, i32 %y) {
 ; CHECK-LABEL: f4:
 ; CHECK: smlatt
+; CHECK-THUMBV6-NOT: smlatt
         %tmp1 = ashr i32 %x, 16
         %tmp3 = ashr i32 %y, 16
         %tmp4 = mul i32 %tmp3, %tmp1
@@ -47,7 +50,9 @@ define i32 @f4(i32 %a, i32 %x, i32 %y) {
 
 define i32 @f5(i32 %a, i16 %x, i16 %y) {
 ; CHECK-LABEL: f5:
+; CHECK-NOT: sxth
 ; CHECK: smlabb
+; CHECK-THUMBV6-NOT: smlabb
         %tmp1 = sext i16 %x to i32
         %tmp3 = sext i16 %y to i32
         %tmp4 = mul i32 %tmp3, %tmp1
@@ -55,19 +60,22 @@ define i32 @f5(i32 %a, i16 %x, i16 %y) {
         ret i32 %tmp5
 }
 
-define i32 @f6(i32 %a, i16 %x, i32 %y) {
+define i32 @f6(i32 %a, i32 %x, i16 %y) {
 ; CHECK-LABEL: f6:
-; CHECK: smlabt
-        %tmp1 = sext i16 %x to i32
-        %tmp3 = ashr i32 %y, 16
-        %tmp4 = mul i32 %tmp3, %tmp1
-        %tmp5 = add i32 %tmp4, %a
+; CHECK-NOT: sxth
+; CHECK: {{smlatb r0, r1, r2, r0|smlabt r0, r2, r1, r0}}
+; CHECK-THUMBV6-NOT: {{smlatb|smlabt}}
+        %tmp1 = sext i16 %y to i32
+        %tmp2 = ashr i32 %x, 16
+        %tmp3 = mul i32 %tmp2, %tmp1
+        %tmp5 = add i32 %tmp3, %a
         ret i32 %tmp5
 }
 
 define i32 @f7(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: f7:
-; CHECK: smlawb
+; CHECK: smlawb r0, r0, r1, r2
+; CHECK-THUMBV6-NOT: smlawb
         %shl = shl i32 %b, 16
         %shr = ashr exact i32 %shl, 16
         %conv = sext i32 %a to i64
@@ -81,7 +89,9 @@ define i32 @f7(i32 %a, i32 %b, i32 %c) {
 
 define i32 @f8(i32 %a, i16 signext %b, i32 %c) {
 ; CHECK-LABEL: f8:
-; CHECK: smlawb
+; CHECK-NOT: sxth
+; CHECK: smlawb r0, r0, r1, r2
+; CHECK-THUMBV6-NOT: smlawb
         %conv = sext i32 %a to i64
         %conv1 = sext i16 %b to i64
         %mul = mul nsw i64 %conv1, %conv
@@ -93,7 +103,8 @@ define i32 @f8(i32 %a, i16 signext %b, i32 %c) {
 
 define i32 @f9(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: f9:
-; CHECK: smlawt
+; CHECK: smlawt r0, r0, r1, r2
+; CHECK-THUMBV6-NOT: smlawt
         %conv = sext i32 %a to i64
         %shr = ashr i32 %b, 16
         %conv1 = sext i32 %shr to i64
@@ -104,9 +115,10 @@ define i32 @f9(i32 %a, i32 %b, i32 %c) {
         ret i32 %add
 }
 
-define i32 @f10(i32 %a, i32 %b, i32 %c) {
+define i32 @f10(i32 %a, i32 %b) {
 ; CHECK-LABEL: f10:
-; CHECK: smulwb
+; CHECK: smulwb r0, r0, r1
+; CHECK-THUMBV6-NOT: smulwb
         %shl = shl i32 %b, 16
         %shr = ashr exact i32 %shl, 16
         %conv = sext i32 %a to i64
@@ -117,9 +129,11 @@ define i32 @f10(i32 %a, i32 %b, i32 %c) {
         ret i32 %conv4
 }
 
-define i32 @f11(i32 %a, i16 signext %b, i32 %c) {
+define i32 @f11(i32 %a, i16 signext %b) {
 ; CHECK-LABEL: f11:
-; CHECK: smulwb
+; CHECK-NOT: sxth
+; CHECK: smulwb r0, r0, r1
+; CHECK-THUMBV6-NOT: smulwb
         %conv = sext i32 %a to i64
         %conv1 = sext i16 %b to i64
         %mul = mul nsw i64 %conv1, %conv
@@ -128,9 +142,10 @@ define i32 @f11(i32 %a, i16 signext %b, i32 %c) {
         ret i32 %conv2
 }
 
-define i32 @f12(i32 %a, i32 %b, i32 %c) {
+define i32 @f12(i32 %a, i32 %b) {
 ; CHECK-LABEL: f12:
-; CHECK: smulwt
+; CHECK: smulwt r0, r0, r1
+; CHECK-THUMBV6-NOT: smulwt
         %conv = sext i32 %a to i64
         %shr = ashr i32 %b, 16
         %conv1 = sext i32 %shr to i64
@@ -139,3 +154,111 @@ define i32 @f12(i32 %a, i32 %b, i32 %c) {
         %conv3 = trunc i64 %shr25 to i32
         ret i32 %conv3
 }
+
+define i32 @f13(i32 %x, i16 %y) {
+; CHECK-LABEL: f13:
+; CHECK-NOT: sxth
+; CHECK: {{smultb r0, r0, r1|smulbt r0, r1, r0}}
+; CHECK-THUMBV6-NOT: {{smultb|smulbt}}
+        %tmp1 = sext i16 %y to i32
+        %tmp2 = ashr i32 %x, 16
+        %tmp3 = mul i32 %tmp2, %tmp1
+        ret i32 %tmp3
+}
+
+define i32 @f14(i32 %x, i32 %y) {
+; CHECK-LABEL: f14:
+; CHECK-NOT: sxth
+; CHECK: {{smultb r0, r0, r1|smulbt r0, r1, r0}}
+; CHECK-THUMBV6-NOT: {{smultb|smulbt}}
+        %tmp1 = shl i32 %y, 16
+        %tmp2 = ashr i32 %tmp1, 16
+        %tmp3 = ashr i32 %x, 16
+        %tmp4 = mul i32 %tmp3, %tmp2
+        ret i32 %tmp4
+}
+
+define i32 @f15(i32 %x, i32 %y) {
+; CHECK-LABEL: f15:
+; CHECK-NOT: sxth
+; CHECK: {{smulbt r0, r0, r1|smultb r0, r1, r0}}
+; CHECK-THUMBV6-NOT: {{smulbt|smultb}}
+        %tmp1 = shl i32 %x, 16
+        %tmp2 = ashr i32 %tmp1, 16
+        %tmp3 = ashr i32 %y, 16
+        %tmp4 = mul i32 %tmp2, %tmp3
+        ret i32 %tmp4
+}
+
+define i32 @f16(i16 %x, i16 %y) {
+; CHECK-LABEL: f16:
+; CHECK-NOT: sxth
+; CHECK: smulbb
+; CHECK-THUMBV6-NOT: smulbb
+        %tmp1 = sext i16 %x to i32
+        %tmp2 = sext i16 %x to i32
+        %tmp3 = mul i32 %tmp1, %tmp2
+        ret i32 %tmp3
+}
+
+define i32 @f17(i32 %x, i32 %y) {
+; CHECK-LABEL: f17:
+; CHECK: smulbb
+; CHECK-THUMBV6-NOT: smulbb
+        %tmp1 = shl i32 %x, 16
+        %tmp2 = shl i32 %y, 16
+        %tmp3 = ashr i32 %tmp1, 16
+        %tmp4 = ashr i32 %tmp2, 16
+        %tmp5 = mul i32 %tmp3, %tmp4
+        ret i32 %tmp5
+}
+
+define i32 @f18(i32 %a, i32 %x, i32 %y) {
+; CHECK-LABEL: f18:
+; CHECK: {{smlabt r0, r1, r2, r0|smlatb r0, r2, r1, r0}}
+; CHECK-THUMBV6-NOT: {{smlabt|smlatb}}
+        %tmp0 = shl i32 %x, 16
+        %tmp1 = ashr i32 %tmp0, 16
+        %tmp2 = ashr i32 %y, 16
+        %tmp3 = mul i32 %tmp2, %tmp1
+        %tmp5 = add i32 %tmp3, %a
+        ret i32 %tmp5
+}
+
+define i32 @f19(i32 %a, i32 %x, i32 %y) {
+; CHECK-LABEL: f19:
+; CHECK: {{smlatb r0, r1, r2, r0|smlabt r0, r2, r1, r0}}
+; CHECK-THUMBV6-NOT: {{smlatb|smlabt}}
+        %tmp0 = shl i32 %y, 16
+        %tmp1 = ashr i32 %tmp0, 16
+        %tmp2 = ashr i32 %x, 16
+        %tmp3 = mul i32 %tmp2, %tmp1
+        %tmp5 = add i32 %tmp3, %a
+        ret i32 %tmp5
+}
+
+define i32 @f20(i32 %a, i32 %x, i32 %y) {
+; CHECK-LABEL: f20:
+; CHECK: smlabb
+; CHECK-THUMBV6-NOT: smlabb
+        %tmp1 = shl i32 %x, 16
+        %tmp2 = ashr i32 %tmp1, 16
+        %tmp3 = shl i32 %y, 16
+        %tmp4 = ashr i32 %tmp3, 16
+        %tmp5 = mul i32 %tmp2, %tmp4
+        %tmp6 = add i32 %tmp5, %a
+        ret i32 %tmp6
+}
+
+define i32 @f21(i32 %a, i32 %x, i16 %y) {
+; CHECK-LABEL: f21
+; CHECK-NOT: sxth
+; CHECK: smlabb
+; CHECK-THUMBV6-NOT: smlabb
+        %tmp1 = shl i32 %x, 16
+        %tmp2 = ashr i32 %tmp1, 16
+        %tmp3 = sext i16 %y to i32
+        %tmp4 = mul i32 %tmp2, %tmp3
+        %tmp5 = add i32 %a, %tmp4
+        ret i32 %tmp5
+}
author	Sam Parker <sam.parker@arm.com>	2016-08-02 12:44:27 +0000
committer	Sam Parker <sam.parker@arm.com>	2016-08-02 12:44:27 +0000
commit	18bc3a002ed51c9eeef7a591c36baf930d6b8c8f (patch)
tree	27430af11f44f08f7250f6cdf2b13d6d3044814e /llvm/test/CodeGen/ARM
parent	78692ea590046c7c9a66e58a14165ff6b16b5ae5 (diff)
download	bcm5719-llvm-18bc3a002ed51c9eeef7a591c36baf930d6b8c8f.tar.gz bcm5719-llvm-18bc3a002ed51c9eeef7a591c36baf930d6b8c8f.zip