summaryrefslogtreecommitdiffstats
path: root/llvm/test/Analysis
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2015-07-12 11:15:19 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2015-07-12 11:15:19 +0000
commit64cc4ad0a273fec56debf406c8524d1122d249b9 (patch)
tree35e69efafa6d2c3ac1d74bdfa9898949f5fcba34 /llvm/test/Analysis
parentd08eca0181f0d1d21fd7f35fde62eccb509cf5c5 (diff)
downloadbcm5719-llvm-64cc4ad0a273fec56debf406c8524d1122d249b9.tar.gz
bcm5719-llvm-64cc4ad0a273fec56debf406c8524d1122d249b9.zip
[X86][SSE] Vectorized v4i32 non-uniform shifts.
While the v4i32 shl operation is already vectorized using a cvttps2dq/pmulld pattern, the lshr/ashr opeations are still scalarized. This patch adds vectorization support for non-uniform v4i32 shift operations - it splats constant shift amounts to allow them to use the immediate sse shift instructions, or extracts/zero-extends non-constant shift amounts. The individual results are then blended together. Differential Revision: http://reviews.llvm.org/D11063 llvm-svn: 241989
Diffstat (limited to 'llvm/test/Analysis')
-rw-r--r--llvm/test/Analysis/CostModel/X86/testshiftashr.ll24
-rw-r--r--llvm/test/Analysis/CostModel/X86/testshiftlshr.ll24
2 files changed, 24 insertions, 24 deletions
diff --git a/llvm/test/Analysis/CostModel/X86/testshiftashr.ll b/llvm/test/Analysis/CostModel/X86/testshiftashr.ll
index ebb06cc3bba..da4e7d466e2 100644
--- a/llvm/test/Analysis/CostModel/X86/testshiftashr.ll
+++ b/llvm/test/Analysis/CostModel/X86/testshiftashr.ll
@@ -17,9 +17,9 @@ entry:
define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
entry:
; SSE2: shift4i16
- ; SSE2: cost of 40 {{.*}} ashr
+ ; SSE2: cost of 16 {{.*}} ashr
; SSE2-CODEGEN: shift4i16
- ; SSE2-CODEGEN: sarl %cl
+ ; SSE2-CODEGEN: psrad
%0 = ashr %shifttype4i16 %a , %b
ret %shifttype4i16 %0
@@ -77,9 +77,9 @@ entry:
define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
entry:
; SSE2: shift4i32
- ; SSE2: cost of 40 {{.*}} ashr
+ ; SSE2: cost of 16 {{.*}} ashr
; SSE2-CODEGEN: shift4i32
- ; SSE2-CODEGEN: sarl %cl
+ ; SSE2-CODEGEN: psrad
%0 = ashr %shifttype4i32 %a , %b
ret %shifttype4i32 %0
@@ -89,9 +89,9 @@ entry:
define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
entry:
; SSE2: shift8i32
- ; SSE2: cost of 80 {{.*}} ashr
+ ; SSE2: cost of 32 {{.*}} ashr
; SSE2-CODEGEN: shift8i32
- ; SSE2-CODEGEN: sarl %cl
+ ; SSE2-CODEGEN: psrad
%0 = ashr %shifttype8i32 %a , %b
ret %shifttype8i32 %0
@@ -101,9 +101,9 @@ entry:
define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
entry:
; SSE2: shift16i32
- ; SSE2: cost of 160 {{.*}} ashr
+ ; SSE2: cost of 64 {{.*}} ashr
; SSE2-CODEGEN: shift16i32
- ; SSE2-CODEGEN: sarl %cl
+ ; SSE2-CODEGEN: psrad
%0 = ashr %shifttype16i32 %a , %b
ret %shifttype16i32 %0
@@ -113,9 +113,9 @@ entry:
define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
entry:
; SSE2: shift32i32
- ; SSE2: cost of 320 {{.*}} ashr
+ ; SSE2: cost of 128 {{.*}} ashr
; SSE2-CODEGEN: shift32i32
- ; SSE2-CODEGEN: sarl %cl
+ ; SSE2-CODEGEN: psrad
%0 = ashr %shifttype32i32 %a , %b
ret %shifttype32i32 %0
@@ -197,9 +197,9 @@ entry:
define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
entry:
; SSE2: shift4i8
- ; SSE2: cost of 40 {{.*}} ashr
+ ; SSE2: cost of 16 {{.*}} ashr
; SSE2-CODEGEN: shift4i8
- ; SSE2-CODEGEN: sarl %cl
+ ; SSE2-CODEGEN: psrad
%0 = ashr %shifttype4i8 %a , %b
ret %shifttype4i8 %0
diff --git a/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll b/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll
index 0bc60eacac9..5775a42d08a 100644
--- a/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll
+++ b/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll
@@ -17,9 +17,9 @@ entry:
define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
entry:
; SSE2: shift4i16
- ; SSE2: cost of 40 {{.*}} lshr
+ ; SSE2: cost of 16 {{.*}} lshr
; SSE2-CODEGEN: shift4i16
- ; SSE2-CODEGEN: shrl %cl
+ ; SSE2-CODEGEN: psrld
%0 = lshr %shifttype4i16 %a , %b
ret %shifttype4i16 %0
@@ -77,9 +77,9 @@ entry:
define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
entry:
; SSE2: shift4i32
- ; SSE2: cost of 40 {{.*}} lshr
+ ; SSE2: cost of 16 {{.*}} lshr
; SSE2-CODEGEN: shift4i32
- ; SSE2-CODEGEN: shrl %cl
+ ; SSE2-CODEGEN: psrld
%0 = lshr %shifttype4i32 %a , %b
ret %shifttype4i32 %0
@@ -89,9 +89,9 @@ entry:
define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
entry:
; SSE2: shift8i32
- ; SSE2: cost of 80 {{.*}} lshr
+ ; SSE2: cost of 32 {{.*}} lshr
; SSE2-CODEGEN: shift8i32
- ; SSE2-CODEGEN: shrl %cl
+ ; SSE2-CODEGEN: psrld
%0 = lshr %shifttype8i32 %a , %b
ret %shifttype8i32 %0
@@ -101,9 +101,9 @@ entry:
define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
entry:
; SSE2: shift16i32
- ; SSE2: cost of 160 {{.*}} lshr
+ ; SSE2: cost of 64 {{.*}} lshr
; SSE2-CODEGEN: shift16i32
- ; SSE2-CODEGEN: shrl %cl
+ ; SSE2-CODEGEN: psrld
%0 = lshr %shifttype16i32 %a , %b
ret %shifttype16i32 %0
@@ -113,9 +113,9 @@ entry:
define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
entry:
; SSE2: shift32i32
- ; SSE2: cost of 320 {{.*}} lshr
+ ; SSE2: cost of 128 {{.*}} lshr
; SSE2-CODEGEN: shift32i32
- ; SSE2-CODEGEN: shrl %cl
+ ; SSE2-CODEGEN: psrld
%0 = lshr %shifttype32i32 %a , %b
ret %shifttype32i32 %0
@@ -197,9 +197,9 @@ entry:
define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
entry:
; SSE2: shift4i8
- ; SSE2: cost of 40 {{.*}} lshr
+ ; SSE2: cost of 16 {{.*}} lshr
; SSE2-CODEGEN: shift4i8
- ; SSE2-CODEGEN: shrl %cl
+ ; SSE2-CODEGEN: psrld
%0 = lshr %shifttype4i8 %a , %b
ret %shifttype4i8 %0
OpenPOWER on IntegriCloud