summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/x86-64-double-precision-shift-left.ll77
-rw-r--r--llvm/test/CodeGen/X86/x86-64-double-precision-shift-right.ll74
-rw-r--r--llvm/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll67
-rw-r--r--llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll57
4 files changed, 275 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/x86-64-double-precision-shift-left.ll b/llvm/test/CodeGen/X86/x86-64-double-precision-shift-left.ll
new file mode 100644
index 00000000000..f2380f23b8e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/x86-64-double-precision-shift-left.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -march=x86-64 -mcpu=bdver1 | FileCheck %s
+; Verify that for the architectures that are known to have poor latency
+; double precision shift instructions we generate alternative sequence
+; of instructions with lower latencies instead of shld instruction.
+
+;uint64_t lshift1(uint64_t a, uint64_t b)
+;{
+; return (a << 1) | (b >> 63);
+;}
+
+; CHECK: lshift1:
+; CHECK: addq {{.*}},{{.*}}
+; CHECK-NEXT: shrq $63, {{.*}}
+; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}}
+
+
+define i64 @lshift1(i64 %a, i64 %b) nounwind readnone uwtable {
+entry:
+ %shl = shl i64 %a, 1
+ %shr = lshr i64 %b, 63
+ %or = or i64 %shr, %shl
+ ret i64 %or
+}
+
+;uint64_t lshift2(uint64_t a, uint64_t b)
+;{
+; return (a << 2) | (b >> 62);
+;}
+
+; CHECK: lshift2:
+; CHECK: shlq $2, {{.*}}
+; CHECK-NEXT: shrq $62, {{.*}}
+; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}}
+
+define i64 @lshift2(i64 %a, i64 %b) nounwind readnone uwtable {
+entry:
+ %shl = shl i64 %a, 2
+ %shr = lshr i64 %b, 62
+ %or = or i64 %shr, %shl
+ ret i64 %or
+}
+
+;uint64_t lshift7(uint64_t a, uint64_t b)
+;{
+; return (a << 7) | (b >> 57);
+;}
+
+; CHECK: lshift7:
+; CHECK: shlq $7, {{.*}}
+; CHECK-NEXT: shrq $57, {{.*}}
+; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}}
+
+define i64 @lshift7(i64 %a, i64 %b) nounwind readnone uwtable {
+entry:
+ %shl = shl i64 %a, 7
+ %shr = lshr i64 %b, 57
+ %or = or i64 %shr, %shl
+ ret i64 %or
+}
+
+;uint64_t lshift63(uint64_t a, uint64_t b)
+;{
+; return (a << 63) | (b >> 1);
+;}
+
+; CHECK: lshift63:
+; CHECK: shlq $63, {{.*}}
+; CHECK-NEXT: shrq {{.*}}
+; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}}
+
+define i64 @lshift63(i64 %a, i64 %b) nounwind readnone uwtable {
+entry:
+ %shl = shl i64 %a, 63
+ %shr = lshr i64 %b, 1
+ %or = or i64 %shr, %shl
+ ret i64 %or
+}
diff --git a/llvm/test/CodeGen/X86/x86-64-double-precision-shift-right.ll b/llvm/test/CodeGen/X86/x86-64-double-precision-shift-right.ll
new file mode 100644
index 00000000000..5edaad89df4
--- /dev/null
+++ b/llvm/test/CodeGen/X86/x86-64-double-precision-shift-right.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -march=x86-64 -mcpu=bdver1 | FileCheck %s
+; Verify that for the architectures that are known to have poor latency
+; double precision shift instructions we generate alternative sequence
+; of instructions with lower latencies instead of shrd instruction.
+
+;uint64_t rshift1(uint64_t a, uint64_t b)
+;{
+; return (a >> 1) | (b << 63);
+;}
+
+; CHECK: rshift1:
+; CHECK: shrq {{.*}}
+; CHECK-NEXT: shlq $63, {{.*}}
+; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}}
+
+define i64 @rshift1(i64 %a, i64 %b) nounwind readnone uwtable {
+ %1 = lshr i64 %a, 1
+ %2 = shl i64 %b, 63
+ %3 = or i64 %2, %1
+ ret i64 %3
+}
+
+;uint64_t rshift2(uint64_t a, uint64_t b)
+;{
+; return (a >> 2) | (b << 62);
+;}
+
+; CHECK: rshift2:
+; CHECK: shrq $2, {{.*}}
+; CHECK-NEXT: shlq $62, {{.*}}
+; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}}
+
+
+define i64 @rshift2(i64 %a, i64 %b) nounwind readnone uwtable {
+ %1 = lshr i64 %a, 2
+ %2 = shl i64 %b, 62
+ %3 = or i64 %2, %1
+ ret i64 %3
+}
+
+;uint64_t rshift7(uint64_t a, uint64_t b)
+;{
+; return (a >> 7) | (b << 57);
+;}
+
+; CHECK: rshift7:
+; CHECK: shrq $7, {{.*}}
+; CHECK-NEXT: shlq $57, {{.*}}
+; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}}
+
+
+define i64 @rshift7(i64 %a, i64 %b) nounwind readnone uwtable {
+ %1 = lshr i64 %a, 7
+ %2 = shl i64 %b, 57
+ %3 = or i64 %2, %1
+ ret i64 %3
+}
+
+;uint64_t rshift63(uint64_t a, uint64_t b)
+;{
+; return (a >> 63) | (b << 1);
+;}
+
+; CHECK: rshift63:
+; CHECK: shrq $63, {{.*}}
+; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}}
+; CHECK-NEXT: orq {{.*}}, {{.*}}
+
+define i64 @rshift63(i64 %a, i64 %b) nounwind readnone uwtable {
+ %1 = lshr i64 %a, 63
+ %2 = shl i64 %b, 1
+ %3 = or i64 %2, %1
+ ret i64 %3
+}
diff --git a/llvm/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll b/llvm/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll
new file mode 100644
index 00000000000..5d7a10b5901
--- /dev/null
+++ b/llvm/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86-64 -mcpu=bdver1 | FileCheck %s
+
+; clang -Oz -c test1.cpp -emit-llvm -S -o
+; Verify that we generate shld insruction when we are optimizing for size,
+; even for X86_64 processors that are known to have poor latency double
+; precision shift instuctions.
+; uint64_t lshift10(uint64_t a, uint64_t b)
+; {
+; return (a << 10) | (b >> 54);
+; }
+
+; Function Attrs: minsize nounwind optsize readnone uwtable
+define i64 @_Z8lshift10mm(i64 %a, i64 %b) #0 {
+entry:
+; CHECK: shldq $10
+ %shl = shl i64 %a, 10
+ %shr = lshr i64 %b, 54
+ %or = or i64 %shr, %shl
+ ret i64 %or
+}
+
+attributes #0 = { minsize nounwind optsize readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+
+; clang -Os -c test2.cpp -emit-llvm -S
+; Verify that we generate shld insruction when we are optimizing for size,
+; even for X86_64 processors that are known to have poor latency double
+; precision shift instuctions.
+; uint64_t lshift11(uint64_t a, uint64_t b)
+; {
+; return (a << 11) | (b >> 53);
+; }
+
+; Function Attrs: nounwind optsize readnone uwtable
+define i64 @_Z8lshift11mm(i64 %a, i64 %b) #1 {
+entry:
+; CHECK: shldq $11
+ %shl = shl i64 %a, 11
+ %shr = lshr i64 %b, 53
+ %or = or i64 %shr, %shl
+ ret i64 %or
+}
+
+attributes #1 = { nounwind optsize readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; clang -O2 -c test2.cpp -emit-llvm -S
+; Verify that we do not generate shld insruction when we are not optimizing
+; for size for X86_64 processors that are known to have poor latency double
+; precision shift instuctions.
+; uint64_t lshift12(uint64_t a, uint64_t b)
+; {
+; return (a << 12) | (b >> 52);
+; }
+
+; Function Attrs: nounwind optsize readnone uwtable
+define i64 @_Z8lshift12mm(i64 %a, i64 %b) #2 {
+entry:
+; CHECK: shlq $12
+; CHECK-NEXT: shrq $52
+ %shl = shl i64 %a, 12
+ %shr = lshr i64 %b, 52
+ %or = or i64 %shr, %shl
+ ret i64 %or
+}
+
+attributes #2= { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
diff --git a/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
new file mode 100644
index 00000000000..5bab434ae6a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -march=x86-64 -mcpu=athlon | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=athlon-tbird | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=athlon-4 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=athlon-xp | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=athlon-mp | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=k8 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=opteron | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=athlon64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=athlon-fx | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=k8-sse3 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=opteron-sse3 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=athlon64-sse3 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=amdfam10 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=btver1 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=btver2 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=bdver1 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=bdver2 | FileCheck %s
+
+; Verify that for the X86_64 processors that are known to have poor latency
+; double precision shift instructions we do not generate 'shld' or 'shrd'
+; instructions.
+
+;uint64_t lshift(uint64_t a, uint64_t b, int c)
+;{
+; return (a << c) | (b >> (64-c));
+;}
+
+define i64 @lshift(i64 %a, i64 %b, i32 %c) nounwind readnone {
+entry:
+; CHECK-NOT: shld
+ %sh_prom = zext i32 %c to i64
+ %shl = shl i64 %a, %sh_prom
+ %sub = sub nsw i32 64, %c
+ %sh_prom1 = zext i32 %sub to i64
+ %shr = lshr i64 %b, %sh_prom1
+ %or = or i64 %shr, %shl
+ ret i64 %or
+}
+
+;uint64_t rshift(uint64_t a, uint64_t b, int c)
+;{
+; return (a >> c) | (b << (64-c));
+;}
+
+define i64 @rshift(i64 %a, i64 %b, i32 %c) nounwind readnone {
+entry:
+; CHECK-NOT: shrd
+ %sh_prom = zext i32 %c to i64
+ %shr = lshr i64 %a, %sh_prom
+ %sub = sub nsw i32 64, %c
+ %sh_prom1 = zext i32 %sub to i64
+ %shl = shl i64 %b, %sh_prom1
+ %or = or i64 %shl, %shr
+ ret i64 %or
+}
+
+
OpenPOWER on IntegriCloud