[X86][SSE] Vectorized v4i32 non-uniform shifts.

While the v4i32 shl operation is already vectorized using a cvttps2dq/pmulld pattern, the lshr/ashr opeations are still scalarized. This patch adds vectorization support for non-uniform v4i32 shift operations - it splats constant shift amounts to allow them to use the immediate sse shift instructions, or extracts/zero-extends non-constant shift amounts. The individual results are then blended together. Differential Revision: http://reviews.llvm.org/D11063 llvm-svn: 241989
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2015-07-12 11:15:19 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2015-07-12 11:15:19 +0000
commit: 64cc4ad0a273fec56debf406c8524d1122d249b9 (patch)
tree: 35e69efafa6d2c3ac1d74bdfa9898949f5fcba34 /llvm/test/Analysis
parent: d08eca0181f0d1d21fd7f35fde62eccb509cf5c5 (diff)
download: bcm5719-llvm-64cc4ad0a273fec56debf406c8524d1122d249b9.tar.gz
bcm5719-llvm-64cc4ad0a273fec56debf406c8524d1122d249b9.zip
2 files changed, 24 insertions, 24 deletions
diff --git a/llvm/test/Analysis/CostModel/X86/testshiftashr.ll b/llvm/test/Analysis/CostModel/X86/testshiftashr.ll
index ebb06cc3bba..da4e7d466e2 100644
--- a/llvm/test/Analysis/CostModel/X86/testshiftashr.ll
+++ b/llvm/test/Analysis/CostModel/X86/testshiftashr.ll
@@ -17,9 +17,9 @@ entry:
 define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
 entry:
   ; SSE2: shift4i16
-  ; SSE2: cost of 40 {{.*}} ashr
+  ; SSE2: cost of 16 {{.*}} ashr
   ; SSE2-CODEGEN: shift4i16
-  ; SSE2-CODEGEN: sarl %cl
+  ; SSE2-CODEGEN: psrad
 
   %0 = ashr %shifttype4i16 %a , %b
   ret %shifttype4i16 %0
@@ -77,9 +77,9 @@ entry:
 define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
 entry:
   ; SSE2: shift4i32
-  ; SSE2: cost of 40 {{.*}} ashr
+  ; SSE2: cost of 16 {{.*}} ashr
   ; SSE2-CODEGEN: shift4i32
-  ; SSE2-CODEGEN: sarl %cl
+  ; SSE2-CODEGEN: psrad
 
   %0 = ashr %shifttype4i32 %a , %b
   ret %shifttype4i32 %0
@@ -89,9 +89,9 @@ entry:
 define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
 entry:
   ; SSE2: shift8i32
-  ; SSE2: cost of 80 {{.*}} ashr
+  ; SSE2: cost of 32 {{.*}} ashr
   ; SSE2-CODEGEN: shift8i32
-  ; SSE2-CODEGEN: sarl %cl
+  ; SSE2-CODEGEN: psrad
 
   %0 = ashr %shifttype8i32 %a , %b
   ret %shifttype8i32 %0
@@ -101,9 +101,9 @@ entry:
 define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
 entry:
   ; SSE2: shift16i32
-  ; SSE2: cost of 160 {{.*}} ashr
+  ; SSE2: cost of 64 {{.*}} ashr
   ; SSE2-CODEGEN: shift16i32
-  ; SSE2-CODEGEN: sarl %cl
+  ; SSE2-CODEGEN: psrad
 
   %0 = ashr %shifttype16i32 %a , %b
   ret %shifttype16i32 %0
@@ -113,9 +113,9 @@ entry:
 define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
 entry:
   ; SSE2: shift32i32
-  ; SSE2: cost of 320 {{.*}} ashr
+  ; SSE2: cost of 128 {{.*}} ashr
   ; SSE2-CODEGEN: shift32i32
-  ; SSE2-CODEGEN: sarl %cl
+  ; SSE2-CODEGEN: psrad
 
   %0 = ashr %shifttype32i32 %a , %b
   ret %shifttype32i32 %0
@@ -197,9 +197,9 @@ entry:
 define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
 entry:
   ; SSE2: shift4i8
-  ; SSE2: cost of 40 {{.*}} ashr
+  ; SSE2: cost of 16 {{.*}} ashr
   ; SSE2-CODEGEN: shift4i8
-  ; SSE2-CODEGEN: sarl %cl
+  ; SSE2-CODEGEN: psrad
 
   %0 = ashr %shifttype4i8 %a , %b
   ret %shifttype4i8 %0
diff --git a/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll b/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll
index 0bc60eacac9..5775a42d08a 100644
--- a/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll
+++ b/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll
@@ -17,9 +17,9 @@ entry:
 define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
 entry:
   ; SSE2: shift4i16
-  ; SSE2: cost of 40 {{.*}} lshr
+  ; SSE2: cost of 16 {{.*}} lshr
   ; SSE2-CODEGEN: shift4i16
-  ; SSE2-CODEGEN: shrl %cl
+  ; SSE2-CODEGEN: psrld
 
   %0 = lshr %shifttype4i16 %a , %b
   ret %shifttype4i16 %0
@@ -77,9 +77,9 @@ entry:
 define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
 entry:
   ; SSE2: shift4i32
-  ; SSE2: cost of 40 {{.*}} lshr
+  ; SSE2: cost of 16 {{.*}} lshr
   ; SSE2-CODEGEN: shift4i32
-  ; SSE2-CODEGEN: shrl %cl
+  ; SSE2-CODEGEN: psrld
 
   %0 = lshr %shifttype4i32 %a , %b
   ret %shifttype4i32 %0
@@ -89,9 +89,9 @@ entry:
 define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
 entry:
   ; SSE2: shift8i32
-  ; SSE2: cost of 80 {{.*}} lshr
+  ; SSE2: cost of 32 {{.*}} lshr
   ; SSE2-CODEGEN: shift8i32
-  ; SSE2-CODEGEN: shrl %cl
+  ; SSE2-CODEGEN: psrld
 
   %0 = lshr %shifttype8i32 %a , %b
   ret %shifttype8i32 %0
@@ -101,9 +101,9 @@ entry:
 define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
 entry:
   ; SSE2: shift16i32
-  ; SSE2: cost of 160 {{.*}} lshr
+  ; SSE2: cost of 64 {{.*}} lshr
   ; SSE2-CODEGEN: shift16i32
-  ; SSE2-CODEGEN: shrl %cl
+  ; SSE2-CODEGEN: psrld
 
   %0 = lshr %shifttype16i32 %a , %b
   ret %shifttype16i32 %0
@@ -113,9 +113,9 @@ entry:
 define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
 entry:
   ; SSE2: shift32i32
-  ; SSE2: cost of 320 {{.*}} lshr
+  ; SSE2: cost of 128 {{.*}} lshr
   ; SSE2-CODEGEN: shift32i32
-  ; SSE2-CODEGEN: shrl %cl
+  ; SSE2-CODEGEN: psrld
 
   %0 = lshr %shifttype32i32 %a , %b
   ret %shifttype32i32 %0
@@ -197,9 +197,9 @@ entry:
 define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
 entry:
   ; SSE2: shift4i8
-  ; SSE2: cost of 40 {{.*}} lshr
+  ; SSE2: cost of 16 {{.*}} lshr
   ; SSE2-CODEGEN: shift4i8
-  ; SSE2-CODEGEN: shrl %cl
+  ; SSE2-CODEGEN: psrld
 
   %0 = lshr %shifttype4i8 %a , %b
   ret %shifttype4i8 %0
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2015-07-12 11:15:19 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2015-07-12 11:15:19 +0000
commit	64cc4ad0a273fec56debf406c8524d1122d249b9 (patch)
tree	35e69efafa6d2c3ac1d74bdfa9898949f5fcba34 /llvm/test/Analysis
parent	d08eca0181f0d1d21fd7f35fde62eccb509cf5c5 (diff)
download	bcm5719-llvm-64cc4ad0a273fec56debf406c8524d1122d249b9.tar.gz bcm5719-llvm-64cc4ad0a273fec56debf406c8524d1122d249b9.zip