diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-07-12 11:15:19 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-07-12 11:15:19 +0000 |
| commit | 64cc4ad0a273fec56debf406c8524d1122d249b9 (patch) | |
| tree | 35e69efafa6d2c3ac1d74bdfa9898949f5fcba34 /llvm/test/Analysis | |
| parent | d08eca0181f0d1d21fd7f35fde62eccb509cf5c5 (diff) | |
| download | bcm5719-llvm-64cc4ad0a273fec56debf406c8524d1122d249b9.tar.gz bcm5719-llvm-64cc4ad0a273fec56debf406c8524d1122d249b9.zip | |
[X86][SSE] Vectorized v4i32 non-uniform shifts.
While the v4i32 shl operation is already vectorized using a cvttps2dq/pmulld pattern, the lshr/ashr opeations are still scalarized.
This patch adds vectorization support for non-uniform v4i32 shift operations - it splats constant shift amounts to allow them to use the immediate sse shift instructions, or extracts/zero-extends non-constant shift amounts. The individual results are then blended together.
Differential Revision: http://reviews.llvm.org/D11063
llvm-svn: 241989
Diffstat (limited to 'llvm/test/Analysis')
| -rw-r--r-- | llvm/test/Analysis/CostModel/X86/testshiftashr.ll | 24 | ||||
| -rw-r--r-- | llvm/test/Analysis/CostModel/X86/testshiftlshr.ll | 24 |
2 files changed, 24 insertions, 24 deletions
diff --git a/llvm/test/Analysis/CostModel/X86/testshiftashr.ll b/llvm/test/Analysis/CostModel/X86/testshiftashr.ll index ebb06cc3bba..da4e7d466e2 100644 --- a/llvm/test/Analysis/CostModel/X86/testshiftashr.ll +++ b/llvm/test/Analysis/CostModel/X86/testshiftashr.ll @@ -17,9 +17,9 @@ entry: define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) { entry: ; SSE2: shift4i16 - ; SSE2: cost of 40 {{.*}} ashr + ; SSE2: cost of 16 {{.*}} ashr ; SSE2-CODEGEN: shift4i16 - ; SSE2-CODEGEN: sarl %cl + ; SSE2-CODEGEN: psrad %0 = ashr %shifttype4i16 %a , %b ret %shifttype4i16 %0 @@ -77,9 +77,9 @@ entry: define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) { entry: ; SSE2: shift4i32 - ; SSE2: cost of 40 {{.*}} ashr + ; SSE2: cost of 16 {{.*}} ashr ; SSE2-CODEGEN: shift4i32 - ; SSE2-CODEGEN: sarl %cl + ; SSE2-CODEGEN: psrad %0 = ashr %shifttype4i32 %a , %b ret %shifttype4i32 %0 @@ -89,9 +89,9 @@ entry: define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) { entry: ; SSE2: shift8i32 - ; SSE2: cost of 80 {{.*}} ashr + ; SSE2: cost of 32 {{.*}} ashr ; SSE2-CODEGEN: shift8i32 - ; SSE2-CODEGEN: sarl %cl + ; SSE2-CODEGEN: psrad %0 = ashr %shifttype8i32 %a , %b ret %shifttype8i32 %0 @@ -101,9 +101,9 @@ entry: define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) { entry: ; SSE2: shift16i32 - ; SSE2: cost of 160 {{.*}} ashr + ; SSE2: cost of 64 {{.*}} ashr ; SSE2-CODEGEN: shift16i32 - ; SSE2-CODEGEN: sarl %cl + ; SSE2-CODEGEN: psrad %0 = ashr %shifttype16i32 %a , %b ret %shifttype16i32 %0 @@ -113,9 +113,9 @@ entry: define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) { entry: ; SSE2: shift32i32 - ; SSE2: cost of 320 {{.*}} ashr + ; SSE2: cost of 128 {{.*}} ashr ; SSE2-CODEGEN: shift32i32 - ; SSE2-CODEGEN: sarl %cl + ; SSE2-CODEGEN: psrad %0 = ashr %shifttype32i32 %a , %b ret %shifttype32i32 %0 @@ -197,9 +197,9 @@ entry: define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) { entry: ; SSE2: shift4i8 - ; SSE2: cost of 40 {{.*}} ashr + ; SSE2: cost of 16 {{.*}} ashr ; SSE2-CODEGEN: shift4i8 - ; SSE2-CODEGEN: sarl %cl + ; SSE2-CODEGEN: psrad %0 = ashr %shifttype4i8 %a , %b ret %shifttype4i8 %0 diff --git a/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll b/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll index 0bc60eacac9..5775a42d08a 100644 --- a/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll +++ b/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll @@ -17,9 +17,9 @@ entry: define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) { entry: ; SSE2: shift4i16 - ; SSE2: cost of 40 {{.*}} lshr + ; SSE2: cost of 16 {{.*}} lshr ; SSE2-CODEGEN: shift4i16 - ; SSE2-CODEGEN: shrl %cl + ; SSE2-CODEGEN: psrld %0 = lshr %shifttype4i16 %a , %b ret %shifttype4i16 %0 @@ -77,9 +77,9 @@ entry: define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) { entry: ; SSE2: shift4i32 - ; SSE2: cost of 40 {{.*}} lshr + ; SSE2: cost of 16 {{.*}} lshr ; SSE2-CODEGEN: shift4i32 - ; SSE2-CODEGEN: shrl %cl + ; SSE2-CODEGEN: psrld %0 = lshr %shifttype4i32 %a , %b ret %shifttype4i32 %0 @@ -89,9 +89,9 @@ entry: define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) { entry: ; SSE2: shift8i32 - ; SSE2: cost of 80 {{.*}} lshr + ; SSE2: cost of 32 {{.*}} lshr ; SSE2-CODEGEN: shift8i32 - ; SSE2-CODEGEN: shrl %cl + ; SSE2-CODEGEN: psrld %0 = lshr %shifttype8i32 %a , %b ret %shifttype8i32 %0 @@ -101,9 +101,9 @@ entry: define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) { entry: ; SSE2: shift16i32 - ; SSE2: cost of 160 {{.*}} lshr + ; SSE2: cost of 64 {{.*}} lshr ; SSE2-CODEGEN: shift16i32 - ; SSE2-CODEGEN: shrl %cl + ; SSE2-CODEGEN: psrld %0 = lshr %shifttype16i32 %a , %b ret %shifttype16i32 %0 @@ -113,9 +113,9 @@ entry: define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) { entry: ; SSE2: shift32i32 - ; SSE2: cost of 320 {{.*}} lshr + ; SSE2: cost of 128 {{.*}} lshr ; SSE2-CODEGEN: shift32i32 - ; SSE2-CODEGEN: shrl %cl + ; SSE2-CODEGEN: psrld %0 = lshr %shifttype32i32 %a , %b ret %shifttype32i32 %0 @@ -197,9 +197,9 @@ entry: define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) { entry: ; SSE2: shift4i8 - ; SSE2: cost of 40 {{.*}} lshr + ; SSE2: cost of 16 {{.*}} lshr ; SSE2-CODEGEN: shift4i8 - ; SSE2-CODEGEN: shrl %cl + ; SSE2-CODEGEN: psrld %0 = lshr %shifttype4i8 %a , %b ret %shifttype4i8 %0 |

