summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorNadav Rotem <nadav.rotem@intel.com>2011-10-30 13:24:22 +0000
committerNadav Rotem <nadav.rotem@intel.com>2011-10-30 13:24:22 +0000
commitc602b2c4de771b1996a7a9d34f0e933b728b6cab (patch)
treeb0f20d8b6a2b8b50bb4db33c003e0fe9c8efa622 /llvm/test/CodeGen
parent921b9f52f46429afd953c2481d60f4d85e3a87db (diff)
downloadbcm5719-llvm-c602b2c4de771b1996a7a9d34f0e933b728b6cab.tar.gz
bcm5719-llvm-c602b2c4de771b1996a7a9d34f0e933b728b6cab.zip
Fix pr11266.
On x86: (shl V, 1) -> add V,V Hardware support for vector-shift is sparse and in many cases we scalarize the result. Additionally, on sandybridge padd is faster than shl. llvm-svn: 143311
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/X86/2011-10-30-padd.ll20
-rw-r--r--llvm/test/CodeGen/X86/x86-shifts.ll16
2 files changed, 34 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/X86/2011-10-30-padd.ll b/llvm/test/CodeGen/X86/2011-10-30-padd.ll
new file mode 100644
index 00000000000..180ca15a0ee
--- /dev/null
+++ b/llvm/test/CodeGen/X86/2011-10-30-padd.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
+
+;CHECK: addXX_test
+;CHECK: padd
+;CHECK: ret
+
+
+define <16 x i8> @addXX_test(<16 x i8> %a) {
+ %b = add <16 x i8> %a, %a
+ ret <16 x i8> %b
+}
+
+;CHECK: instcombine_test
+;CHECK: padd
+;CHECK: ret
+define <16 x i8> @instcombine_test(<16 x i8> %a) {
+ %b = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %b
+}
+
diff --git a/llvm/test/CodeGen/X86/x86-shifts.ll b/llvm/test/CodeGen/X86/x86-shifts.ll
index 1cb07aa0824..5a91b090472 100644
--- a/llvm/test/CodeGen/X86/x86-shifts.ll
+++ b/llvm/test/CodeGen/X86/x86-shifts.ll
@@ -6,8 +6,9 @@
define <4 x i32> @shl4(<4 x i32> %A) nounwind {
entry:
; CHECK: shl4
+; CHECK: padd
; CHECK: pslld
-; CHECK-NEXT: pslld
+; CHECK: ret
%B = shl <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2>
%C = shl <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1>
%K = xor <4 x i32> %B, %C
@@ -19,6 +20,7 @@ entry:
; CHECK: shr4
; CHECK: psrld
; CHECK-NEXT: psrld
+; CHECK: ret
%B = lshr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2>
%C = lshr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1>
%K = xor <4 x i32> %B, %C
@@ -30,6 +32,7 @@ entry:
; CHECK: sra4
; CHECK: psrad
; CHECK-NEXT: psrad
+; CHECK: ret
%B = ashr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2>
%C = ashr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1>
%K = xor <4 x i32> %B, %C
@@ -41,6 +44,7 @@ entry:
; CHECK: shl2
; CHECK: psllq
; CHECK-NEXT: psllq
+; CHECK: ret
%B = shl <2 x i64> %A, < i64 2, i64 2>
%C = shl <2 x i64> %A, < i64 9, i64 9>
%K = xor <2 x i64> %B, %C
@@ -52,6 +56,7 @@ entry:
; CHECK: shr2
; CHECK: psrlq
; CHECK-NEXT: psrlq
+; CHECK: ret
%B = lshr <2 x i64> %A, < i64 8, i64 8>
%C = lshr <2 x i64> %A, < i64 1, i64 1>
%K = xor <2 x i64> %B, %C
@@ -62,8 +67,9 @@ entry:
define <8 x i16> @shl8(<8 x i16> %A) nounwind {
entry:
; CHECK: shl8
+; CHECK: padd
; CHECK: psllw
-; CHECK-NEXT: psllw
+; CHECK: ret
%B = shl <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
%C = shl <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%K = xor <8 x i16> %B, %C
@@ -75,6 +81,7 @@ entry:
; CHECK: shr8
; CHECK: psrlw
; CHECK-NEXT: psrlw
+; CHECK: ret
%B = lshr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
%C = lshr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%K = xor <8 x i16> %B, %C
@@ -86,6 +93,7 @@ entry:
; CHECK: sra8
; CHECK: psraw
; CHECK-NEXT: psraw
+; CHECK: ret
%B = ashr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
%C = ashr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%K = xor <8 x i16> %B, %C
@@ -100,6 +108,7 @@ entry:
; CHECK: sll8_nosplat
; CHECK-NOT: psll
; CHECK-NOT: psll
+; CHECK: ret
%B = shl <8 x i16> %A, < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2>
%C = shl <8 x i16> %A, < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1>
%K = xor <8 x i16> %B, %C
@@ -112,6 +121,7 @@ entry:
; CHECK: shr2_nosplat
; CHECK-NOT: psrlq
; CHECK-NOT: psrlq
+; CHECK: ret
%B = lshr <2 x i64> %A, < i64 8, i64 1>
%C = lshr <2 x i64> %A, < i64 1, i64 0>
%K = xor <2 x i64> %B, %C
@@ -125,6 +135,7 @@ define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
entry:
; CHECK: shl2_other
; CHECK: psllq
+; CHECK: ret
%B = shl <2 x i32> %A, < i32 2, i32 2>
%C = shl <2 x i32> %A, < i32 9, i32 9>
%K = xor <2 x i32> %B, %C
@@ -135,6 +146,7 @@ define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
entry:
; CHECK: shr2_other
; CHECK: psrlq
+; CHECK: ret
%B = lshr <2 x i32> %A, < i32 8, i32 8>
%C = lshr <2 x i32> %A, < i32 1, i32 1>
%K = xor <2 x i32> %B, %C
OpenPOWER on IntegriCloud