diff options
Diffstat (limited to 'llvm/test')
4 files changed, 275 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/x86-64-double-precision-shift-left.ll b/llvm/test/CodeGen/X86/x86-64-double-precision-shift-left.ll new file mode 100644 index 00000000000..f2380f23b8e --- /dev/null +++ b/llvm/test/CodeGen/X86/x86-64-double-precision-shift-left.ll @@ -0,0 +1,77 @@ +; RUN: llc < %s -march=x86-64 -mcpu=bdver1 | FileCheck %s +; Verify that for the architectures that are known to have poor latency +; double precision shift instructions we generate alternative sequence +; of instructions with lower latencies instead of shld instruction. + +;uint64_t lshift1(uint64_t a, uint64_t b) +;{ +; return (a << 1) | (b >> 63); +;} + +; CHECK: lshift1: +; CHECK: addq {{.*}},{{.*}} +; CHECK-NEXT: shrq $63, {{.*}} +; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} + + +define i64 @lshift1(i64 %a, i64 %b) nounwind readnone uwtable { +entry: + %shl = shl i64 %a, 1 + %shr = lshr i64 %b, 63 + %or = or i64 %shr, %shl + ret i64 %or +} + +;uint64_t lshift2(uint64_t a, uint64_t b) +;{ +; return (a << 2) | (b >> 62); +;} + +; CHECK: lshift2: +; CHECK: shlq $2, {{.*}} +; CHECK-NEXT: shrq $62, {{.*}} +; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} + +define i64 @lshift2(i64 %a, i64 %b) nounwind readnone uwtable { +entry: + %shl = shl i64 %a, 2 + %shr = lshr i64 %b, 62 + %or = or i64 %shr, %shl + ret i64 %or +} + +;uint64_t lshift7(uint64_t a, uint64_t b) +;{ +; return (a << 7) | (b >> 57); +;} + +; CHECK: lshift7: +; CHECK: shlq $7, {{.*}} +; CHECK-NEXT: shrq $57, {{.*}} +; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} + +define i64 @lshift7(i64 %a, i64 %b) nounwind readnone uwtable { +entry: + %shl = shl i64 %a, 7 + %shr = lshr i64 %b, 57 + %or = or i64 %shr, %shl + ret i64 %or +} + +;uint64_t lshift63(uint64_t a, uint64_t b) +;{ +; return (a << 63) | (b >> 1); +;} + +; CHECK: lshift63: +; CHECK: shlq $63, {{.*}} +; CHECK-NEXT: shrq {{.*}} +; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} + +define i64 @lshift63(i64 %a, i64 %b) nounwind readnone uwtable { +entry: + %shl = shl i64 %a, 63 + %shr = lshr i64 %b, 1 + %or = or i64 %shr, %shl + ret i64 %or +} diff --git a/llvm/test/CodeGen/X86/x86-64-double-precision-shift-right.ll b/llvm/test/CodeGen/X86/x86-64-double-precision-shift-right.ll new file mode 100644 index 00000000000..5edaad89df4 --- /dev/null +++ b/llvm/test/CodeGen/X86/x86-64-double-precision-shift-right.ll @@ -0,0 +1,74 @@ +; RUN: llc < %s -march=x86-64 -mcpu=bdver1 | FileCheck %s +; Verify that for the architectures that are known to have poor latency +; double precision shift instructions we generate alternative sequence +; of instructions with lower latencies instead of shrd instruction. + +;uint64_t rshift1(uint64_t a, uint64_t b) +;{ +; return (a >> 1) | (b << 63); +;} + +; CHECK: rshift1: +; CHECK: shrq {{.*}} +; CHECK-NEXT: shlq $63, {{.*}} +; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} + +define i64 @rshift1(i64 %a, i64 %b) nounwind readnone uwtable { + %1 = lshr i64 %a, 1 + %2 = shl i64 %b, 63 + %3 = or i64 %2, %1 + ret i64 %3 +} + +;uint64_t rshift2(uint64_t a, uint64_t b) +;{ +; return (a >> 2) | (b << 62); +;} + +; CHECK: rshift2: +; CHECK: shrq $2, {{.*}} +; CHECK-NEXT: shlq $62, {{.*}} +; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} + + +define i64 @rshift2(i64 %a, i64 %b) nounwind readnone uwtable { + %1 = lshr i64 %a, 2 + %2 = shl i64 %b, 62 + %3 = or i64 %2, %1 + ret i64 %3 +} + +;uint64_t rshift7(uint64_t a, uint64_t b) +;{ +; return (a >> 7) | (b << 57); +;} + +; CHECK: rshift7: +; CHECK: shrq $7, {{.*}} +; CHECK-NEXT: shlq $57, {{.*}} +; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} + + +define i64 @rshift7(i64 %a, i64 %b) nounwind readnone uwtable { + %1 = lshr i64 %a, 7 + %2 = shl i64 %b, 57 + %3 = or i64 %2, %1 + ret i64 %3 +} + +;uint64_t rshift63(uint64_t a, uint64_t b) +;{ +; return (a >> 63) | (b << 1); +;} + +; CHECK: rshift63: +; CHECK: shrq $63, {{.*}} +; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} +; CHECK-NEXT: orq {{.*}}, {{.*}} + +define i64 @rshift63(i64 %a, i64 %b) nounwind readnone uwtable { + %1 = lshr i64 %a, 63 + %2 = shl i64 %b, 1 + %3 = or i64 %2, %1 + ret i64 %3 +} diff --git a/llvm/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll b/llvm/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll new file mode 100644 index 00000000000..5d7a10b5901 --- /dev/null +++ b/llvm/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll @@ -0,0 +1,67 @@ +; RUN: llc < %s -march=x86-64 -mcpu=bdver1 | FileCheck %s + +; clang -Oz -c test1.cpp -emit-llvm -S -o +; Verify that we generate shld insruction when we are optimizing for size, +; even for X86_64 processors that are known to have poor latency double +; precision shift instuctions. +; uint64_t lshift10(uint64_t a, uint64_t b) +; { +; return (a << 10) | (b >> 54); +; } + +; Function Attrs: minsize nounwind optsize readnone uwtable +define i64 @_Z8lshift10mm(i64 %a, i64 %b) #0 { +entry: +; CHECK: shldq $10 + %shl = shl i64 %a, 10 + %shr = lshr i64 %b, 54 + %or = or i64 %shr, %shl + ret i64 %or +} + +attributes #0 = { minsize nounwind optsize readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + + +; clang -Os -c test2.cpp -emit-llvm -S +; Verify that we generate shld insruction when we are optimizing for size, +; even for X86_64 processors that are known to have poor latency double +; precision shift instuctions. +; uint64_t lshift11(uint64_t a, uint64_t b) +; { +; return (a << 11) | (b >> 53); +; } + +; Function Attrs: nounwind optsize readnone uwtable +define i64 @_Z8lshift11mm(i64 %a, i64 %b) #1 { +entry: +; CHECK: shldq $11 + %shl = shl i64 %a, 11 + %shr = lshr i64 %b, 53 + %or = or i64 %shr, %shl + ret i64 %or +} + +attributes #1 = { nounwind optsize readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +; clang -O2 -c test2.cpp -emit-llvm -S +; Verify that we do not generate shld insruction when we are not optimizing +; for size for X86_64 processors that are known to have poor latency double +; precision shift instuctions. +; uint64_t lshift12(uint64_t a, uint64_t b) +; { +; return (a << 12) | (b >> 52); +; } + +; Function Attrs: nounwind optsize readnone uwtable +define i64 @_Z8lshift12mm(i64 %a, i64 %b) #2 { +entry: +; CHECK: shlq $12 +; CHECK-NEXT: shrq $52 + %shl = shl i64 %a, 12 + %shr = lshr i64 %b, 52 + %or = or i64 %shr, %shl + ret i64 %or +} + +attributes #2= { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + diff --git a/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll new file mode 100644 index 00000000000..5bab434ae6a --- /dev/null +++ b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s -march=x86-64 -mcpu=athlon | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=athlon-tbird | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=athlon-4 | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=athlon-xp | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=athlon-mp | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=k8 | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=opteron | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=athlon64 | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=athlon-fx | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=k8-sse3 | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=opteron-sse3 | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=athlon64-sse3 | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=amdfam10 | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=btver1 | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=btver2 | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=bdver1 | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=bdver2 | FileCheck %s + +; Verify that for the X86_64 processors that are known to have poor latency +; double precision shift instructions we do not generate 'shld' or 'shrd' +; instructions. + +;uint64_t lshift(uint64_t a, uint64_t b, int c) +;{ +; return (a << c) | (b >> (64-c)); +;} + +define i64 @lshift(i64 %a, i64 %b, i32 %c) nounwind readnone { +entry: +; CHECK-NOT: shld + %sh_prom = zext i32 %c to i64 + %shl = shl i64 %a, %sh_prom + %sub = sub nsw i32 64, %c + %sh_prom1 = zext i32 %sub to i64 + %shr = lshr i64 %b, %sh_prom1 + %or = or i64 %shr, %shl + ret i64 %or +} + +;uint64_t rshift(uint64_t a, uint64_t b, int c) +;{ +; return (a >> c) | (b << (64-c)); +;} + +define i64 @rshift(i64 %a, i64 %b, i32 %c) nounwind readnone { +entry: +; CHECK-NOT: shrd + %sh_prom = zext i32 %c to i64 + %shr = lshr i64 %a, %sh_prom + %sub = sub nsw i32 64, %c + %sh_prom1 = zext i32 %sub to i64 + %shl = shl i64 %b, %sh_prom1 + %or = or i64 %shl, %shr + ret i64 %or +} + + |

