summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vec_extract-sse4.ll
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2008-05-13 08:35:03 +0000
committerEvan Cheng <evan.cheng@apple.com>2008-05-13 08:35:03 +0000
commit1120279ae6f2a502093a0c20c1b8d5d4dfd44e4c (patch)
treeb9c9615435314d840be21c498984860c0600af65 /llvm/test/CodeGen/X86/vec_extract-sse4.ll
parent525aa89356515867b52d584991b9e2a0db294c97 (diff)
downloadbcm5719-llvm-1120279ae6f2a502093a0c20c1b8d5d4dfd44e4c.tar.gz
bcm5719-llvm-1120279ae6f2a502093a0c20c1b8d5d4dfd44e4c.zip
Instead of a vector load, shuffle and then extract an element. Load the element from address with an offset.
pshufd $1, (%rdi), %xmm0 movd %xmm0, %eax => movl 4(%rdi), %eax llvm-svn: 51026
Diffstat (limited to 'llvm/test/CodeGen/X86/vec_extract-sse4.ll')
-rw-r--r--llvm/test/CodeGen/X86/vec_extract-sse4.ll15
1 files changed, 8 insertions, 7 deletions
diff --git a/llvm/test/CodeGen/X86/vec_extract-sse4.ll b/llvm/test/CodeGen/X86/vec_extract-sse4.ll
index 1ef5e8803ef..d6726be1db6 100644
--- a/llvm/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/llvm/test/CodeGen/X86/vec_extract-sse4.ll
@@ -1,29 +1,30 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
-; RUN: grep extractps %t | count 1
-; RUN: grep pextrd %t | count 2
-; RUN: grep pshufd %t | count 1
+; RUN: grep extractps %t | count 1
+; RUN: grep pextrd %t | count 1
+; RUN: not grep pshufd %t
+; RUN: not grep movss %t
-define void @t1(float* %R, <4 x float>* %P1) {
+define void @t1(float* %R, <4 x float>* %P1) nounwind {
%X = load <4 x float>* %P1
%tmp = extractelement <4 x float> %X, i32 3
store float %tmp, float* %R
ret void
}
-define float @t2(<4 x float>* %P1) {
+define float @t2(<4 x float>* %P1) nounwind {
%X = load <4 x float>* %P1
%tmp = extractelement <4 x float> %X, i32 2
ret float %tmp
}
-define void @t3(i32* %R, <4 x i32>* %P1) {
+define void @t3(i32* %R, <4 x i32>* %P1) nounwind {
%X = load <4 x i32>* %P1
%tmp = extractelement <4 x i32> %X, i32 3
store i32 %tmp, i32* %R
ret void
}
-define i32 @t4(<4 x i32>* %P1) {
+define i32 @t4(<4 x i32>* %P1) nounwind {
%X = load <4 x i32>* %P1
%tmp = extractelement <4 x i32> %X, i32 3
ret i32 %tmp
OpenPOWER on IntegriCloud