summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorSjoerd Meijer <sjoerd.meijer@arm.com>2019-01-31 08:07:30 +0000
committerSjoerd Meijer <sjoerd.meijer@arm.com>2019-01-31 08:07:30 +0000
commitf7cc34cae890fdd711173fcb633cd262ee343764 (patch)
treeb5b705cfcf8aebd0e49feaadeadfb0e35370343a /llvm/test
parenta4938433720b54683f5b5da4ae4d1882c5a99766 (diff)
downloadbcm5719-llvm-f7cc34cae890fdd711173fcb633cd262ee343764.tar.gz
bcm5719-llvm-f7cc34cae890fdd711173fcb633cd262ee343764.zip
[SelectionDAG] Codesize: don't expand SHIFT to SHIFT_PARTS
And instead just generate a libcall. My motivating example on ARM was a simple: shl i64 %A, %B for which the code bloat is quite significant. For other targets that also accept __int128/i128 such as AArch64 and X86, it is also beneficial for these cases to generate a libcall when optimising for minsize. On these 64-bit targets, the 64-bits shifts are of course unaffected because the SHIFT/SHIFT_PARTS lowering operation action is not set to custom/expand. Differential Revision: https://reviews.llvm.org/D57386 llvm-svn: 352736
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AArch64/shift_minsize.ll122
-rw-r--r--llvm/test/CodeGen/ARM/shift_minsize.ll32
-rw-r--r--llvm/test/CodeGen/X86/shift_minsize.ll134
3 files changed, 288 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AArch64/shift_minsize.ll b/llvm/test/CodeGen/AArch64/shift_minsize.ll
new file mode 100644
index 00000000000..d1b95e87577
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/shift_minsize.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
+
+define i64 @f0(i64 %val, i64 %amt) minsize optsize {
+; CHECK-LABEL: f0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: lsl x0, x0, x1
+; CHECK-NEXT: ret
+ %res = shl i64 %val, %amt
+ ret i64 %res
+}
+
+define i32 @f1(i64 %x, i64 %y) minsize optsize {
+; CHECK-LABEL: f1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: lsl x0, x0, x1
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+ %a = shl i64 %x, %y
+ %b = trunc i64 %a to i32
+ ret i32 %b
+}
+
+define i32 @f2(i64 %x, i64 %y) minsize optsize {
+; CHECK-LABEL: f2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: asr x0, x0, x1
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+ %a = ashr i64 %x, %y
+ %b = trunc i64 %a to i32
+ ret i32 %b
+}
+
+define i32 @f3(i64 %x, i64 %y) minsize optsize {
+; CHECK-LABEL: f3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: lsr x0, x0, x1
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+ %a = lshr i64 %x, %y
+ %b = trunc i64 %a to i32
+ ret i32 %b
+}
+
+define dso_local { i64, i64 } @shl128(i64 %x.coerce0, i64 %x.coerce1, i8 signext %y) minsize optsize {
+; CHECK-LABEL: shl128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
+; CHECK-NEXT: bl __ashlti3
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
+ %x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
+ %x.sroa.0.0.insert.ext = zext i64 %x.coerce0 to i128
+ %x.sroa.0.0.insert.insert = or i128 %x.sroa.2.0.insert.shift, %x.sroa.0.0.insert.ext
+ %conv = sext i8 %y to i32
+ %sh_prom = zext i32 %conv to i128
+ %shl = shl i128 %x.sroa.0.0.insert.insert, %sh_prom
+ %retval.sroa.0.0.extract.trunc = trunc i128 %shl to i64
+ %retval.sroa.2.0.extract.shift = lshr i128 %shl, 64
+ %retval.sroa.2.0.extract.trunc = trunc i128 %retval.sroa.2.0.extract.shift to i64
+ %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.extract.trunc, 0
+ %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
+ ret { i64, i64 } %.fca.1.insert
+}
+
+define dso_local { i64, i64 } @ashr128(i64 %x.coerce0, i64 %x.coerce1, i8 signext %y) minsize optsize {
+; CHECK-LABEL: ashr128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
+; CHECK-NEXT: bl __ashrti3
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
+ %x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
+ %x.sroa.0.0.insert.ext = zext i64 %x.coerce0 to i128
+ %x.sroa.0.0.insert.insert = or i128 %x.sroa.2.0.insert.shift, %x.sroa.0.0.insert.ext
+ %conv = sext i8 %y to i32
+ %sh_prom = zext i32 %conv to i128
+ %shr = ashr i128 %x.sroa.0.0.insert.insert, %sh_prom
+ %retval.sroa.0.0.extract.trunc = trunc i128 %shr to i64
+ %retval.sroa.2.0.extract.shift = lshr i128 %shr, 64
+ %retval.sroa.2.0.extract.trunc = trunc i128 %retval.sroa.2.0.extract.shift to i64
+ %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.extract.trunc, 0
+ %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
+ ret { i64, i64 } %.fca.1.insert
+}
+
+define dso_local { i64, i64 } @lshr128(i64 %x.coerce0, i64 %x.coerce1, i8 signext %y) minsize optsize {
+; CHECK-LABEL: lshr128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
+; CHECK-NEXT: bl __lshrti3
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
+ %x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
+ %x.sroa.0.0.insert.ext = zext i64 %x.coerce0 to i128
+ %x.sroa.0.0.insert.insert = or i128 %x.sroa.2.0.insert.shift, %x.sroa.0.0.insert.ext
+ %conv = sext i8 %y to i32
+ %sh_prom = zext i32 %conv to i128
+ %shr = lshr i128 %x.sroa.0.0.insert.insert, %sh_prom
+ %retval.sroa.0.0.extract.trunc = trunc i128 %shr to i64
+ %retval.sroa.2.0.extract.shift = lshr i128 %shr, 64
+ %retval.sroa.2.0.extract.trunc = trunc i128 %retval.sroa.2.0.extract.shift to i64
+ %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.extract.trunc, 0
+ %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
+ ret { i64, i64 } %.fca.1.insert
+}
diff --git a/llvm/test/CodeGen/ARM/shift_minsize.ll b/llvm/test/CodeGen/ARM/shift_minsize.ll
new file mode 100644
index 00000000000..4d10c64392d
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/shift_minsize.ll
@@ -0,0 +1,32 @@
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
+
+define i64 @f0(i64 %val, i64 %amt) minsize optsize {
+; CHECK-LABEL: f0:
+; CHECK: bl __aeabi_llsl
+ %res = shl i64 %val, %amt
+ ret i64 %res
+}
+
+define i32 @f1(i64 %x, i64 %y) minsize optsize {
+; CHECK-LABEL: f1:
+; CHECK: bl __aeabi_llsl
+ %a = shl i64 %x, %y
+ %b = trunc i64 %a to i32
+ ret i32 %b
+}
+
+define i32 @f2(i64 %x, i64 %y) minsize optsize {
+; CHECK-LABEL: f2:
+; CHECK: bl __aeabi_lasr
+ %a = ashr i64 %x, %y
+ %b = trunc i64 %a to i32
+ ret i32 %b
+}
+
+define i32 @f3(i64 %x, i64 %y) minsize optsize {
+; CHECK-LABEL: f3:
+; CHECK: bl __aeabi_llsr
+ %a = lshr i64 %x, %y
+ %b = trunc i64 %a to i32
+ ret i32 %b
+}
diff --git a/llvm/test/CodeGen/X86/shift_minsize.ll b/llvm/test/CodeGen/X86/shift_minsize.ll
new file mode 100644
index 00000000000..5ba46544645
--- /dev/null
+++ b/llvm/test/CodeGen/X86/shift_minsize.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
+
+define i64 @f0(i64 %val, i64 %amt) minsize optsize {
+; CHECK-LABEL: f0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rsi, %rcx
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT: shlq %cl, %rax
+; CHECK-NEXT: retq
+ %res = shl i64 %val, %amt
+ ret i64 %res
+}
+
+define i32 @f1(i64 %x, i64 %y) minsize optsize {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rsi, %rcx
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT: shlq %cl, %rax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+ %a = shl i64 %x, %y
+ %b = trunc i64 %a to i32
+ ret i32 %b
+}
+
+define i32 @f2(i64 %x, i64 %y) minsize optsize {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rsi, %rcx
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT: sarq %cl, %rax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+ %a = ashr i64 %x, %y
+ %b = trunc i64 %a to i32
+ ret i32 %b
+}
+
+define i32 @f3(i64 %x, i64 %y) minsize optsize {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rsi, %rcx
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT: shrq %cl, %rax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+ %a = lshr i64 %x, %y
+ %b = trunc i64 %a to i32
+ ret i32 %b
+}
+
+define dso_local { i64, i64 } @shl128(i64 %x.coerce0, i64 %x.coerce1, i8 signext %y) minsize optsize {
+; CHECK-LABEL: shl128:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzbl %dl, %edx
+; CHECK-NEXT: callq __ashlti3
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
+ %x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
+ %x.sroa.0.0.insert.ext = zext i64 %x.coerce0 to i128
+ %x.sroa.0.0.insert.insert = or i128 %x.sroa.2.0.insert.shift, %x.sroa.0.0.insert.ext
+ %conv = sext i8 %y to i32
+ %sh_prom = zext i32 %conv to i128
+ %shl = shl i128 %x.sroa.0.0.insert.insert, %sh_prom
+ %retval.sroa.0.0.extract.trunc = trunc i128 %shl to i64
+ %retval.sroa.2.0.extract.shift = lshr i128 %shl, 64
+ %retval.sroa.2.0.extract.trunc = trunc i128 %retval.sroa.2.0.extract.shift to i64
+ %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.extract.trunc, 0
+ %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
+ ret { i64, i64 } %.fca.1.insert
+}
+
+define dso_local { i64, i64 } @ashr128(i64 %x.coerce0, i64 %x.coerce1, i8 signext %y) minsize optsize {
+; CHECK-LABEL: ashr128:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq __ashrti3
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
+ %x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
+ %x.sroa.0.0.insert.ext = zext i64 %x.coerce0 to i128
+ %x.sroa.0.0.insert.insert = or i128 %x.sroa.2.0.insert.shift, %x.sroa.0.0.insert.ext
+ %conv = sext i8 %y to i32
+ %sh_prom = zext i32 %conv to i128
+ %shr = ashr i128 %x.sroa.0.0.insert.insert, %sh_prom
+ %retval.sroa.0.0.extract.trunc = trunc i128 %shr to i64
+ %retval.sroa.2.0.extract.shift = lshr i128 %shr, 64
+ %retval.sroa.2.0.extract.trunc = trunc i128 %retval.sroa.2.0.extract.shift to i64
+ %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.extract.trunc, 0
+ %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
+ ret { i64, i64 } %.fca.1.insert
+}
+
+define dso_local { i64, i64 } @lshr128(i64 %x.coerce0, i64 %x.coerce1, i8 signext %y) minsize optsize {
+; CHECK-LABEL: lshr128:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzbl %dl, %edx
+; CHECK-NEXT: callq __lshrti3
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
+ %x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
+ %x.sroa.0.0.insert.ext = zext i64 %x.coerce0 to i128
+ %x.sroa.0.0.insert.insert = or i128 %x.sroa.2.0.insert.shift, %x.sroa.0.0.insert.ext
+ %conv = sext i8 %y to i32
+ %sh_prom = zext i32 %conv to i128
+ %shr = lshr i128 %x.sroa.0.0.insert.insert, %sh_prom
+ %retval.sroa.0.0.extract.trunc = trunc i128 %shr to i64
+ %retval.sroa.2.0.extract.shift = lshr i128 %shr, 64
+ %retval.sroa.2.0.extract.trunc = trunc i128 %retval.sroa.2.0.extract.shift to i64
+ %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.extract.trunc, 0
+ %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
+ ret { i64, i64 } %.fca.1.insert
+}
+
OpenPOWER on IntegriCloud