diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-03-11 20:42:31 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-03-11 20:42:31 +0000 |
commit | 18debfa5b44aae8faa38c40ff0ad32e5fabb7773 (patch) | |
tree | a8014fcb561281d9c35bbb480b15b173c0f5a104 /llvm/test/CodeGen/X86/extractelement-index.ll | |
parent | 9ff5732c921ffae261784551529e82b29440cf95 (diff) | |
download | bcm5719-llvm-18debfa5b44aae8faa38c40ff0ad32e5fabb7773.tar.gz bcm5719-llvm-18debfa5b44aae8faa38c40ff0ad32e5fabb7773.zip |
[X86][SSE] Improve extraction of elements from v16i8 (pre-SSE41)
Without SSE41 (pextrb) we currently extract byte elements from a vector by spilling to stack and reloading the byte.
This patch is an initial attempt at using MOVD/PEXTRW to extract the relevant DWORD/WORD from the vector and then shift+truncate to collect the correct byte.
Extraction of multiple bytes this way would result in code bloat, but as explained in the patch we could probably afford to be more aggressive with the supported extractions before again falling back on spilling - possibly through counting the number of extracts and which DWORD/WORD they originate?
Differential Revision: https://reviews.llvm.org/D29841
llvm-svn: 297568
Diffstat (limited to 'llvm/test/CodeGen/X86/extractelement-index.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/extractelement-index.ll | 24 |
1 files changed, 14 insertions, 10 deletions
diff --git a/llvm/test/CodeGen/X86/extractelement-index.ll b/llvm/test/CodeGen/X86/extractelement-index.ll index 157e42b60a3..e36e33ffe66 100644 --- a/llvm/test/CodeGen/X86/extractelement-index.ll +++ b/llvm/test/CodeGen/X86/extractelement-index.ll @@ -11,8 +11,9 @@ define i8 @extractelement_v16i8_1(<16 x i8> %a) nounwind { ; SSE2-LABEL: extractelement_v16i8_1: ; SSE2: # BB#0: -; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: shrl $8, %eax +; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> ; SSE2-NEXT: retq ; ; SSE41-LABEL: extractelement_v16i8_1: @@ -33,8 +34,9 @@ define i8 @extractelement_v16i8_1(<16 x i8> %a) nounwind { define i8 @extractelement_v16i8_11(<16 x i8> %a) nounwind { ; SSE2-LABEL: extractelement_v16i8_11: ; SSE2: # BB#0: -; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al +; SSE2-NEXT: pextrw $5, %xmm0, %eax +; SSE2-NEXT: shrl $8, %eax +; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> ; SSE2-NEXT: retq ; ; SSE41-LABEL: extractelement_v16i8_11: @@ -55,8 +57,8 @@ define i8 @extractelement_v16i8_11(<16 x i8> %a) nounwind { define i8 @extractelement_v16i8_14(<16 x i8> %a) nounwind { ; SSE2-LABEL: extractelement_v16i8_14: ; SSE2: # BB#0: -; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al +; SSE2-NEXT: pextrw $7, %xmm0, %eax +; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> ; SSE2-NEXT: retq ; ; SSE41-LABEL: extractelement_v16i8_14: @@ -77,8 +79,9 @@ define i8 @extractelement_v16i8_14(<16 x i8> %a) nounwind { define i8 @extractelement_v32i8_1(<32 x i8> %a) nounwind { ; SSE2-LABEL: extractelement_v32i8_1: ; SSE2: # BB#0: -; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: shrl $8, %eax +; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> ; SSE2-NEXT: retq ; ; SSE41-LABEL: extractelement_v32i8_1: @@ -100,8 +103,9 @@ define i8 @extractelement_v32i8_1(<32 x i8> %a) nounwind { define i8 @extractelement_v32i8_17(<32 x i8> %a) nounwind { ; SSE2-LABEL: extractelement_v32i8_17: ; SSE2: # BB#0: -; SSE2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: shrl $8, %eax +; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> ; SSE2-NEXT: retq ; ; SSE41-LABEL: extractelement_v32i8_17: |