summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp28
-rw-r--r--llvm/test/CodeGen/X86/extract-store.ll48
-rw-r--r--llvm/test/CodeGen/X86/extractelement-index.ll24
3 files changed, 54 insertions, 46 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 67bf864ddc8..95bba1a5773 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -13935,7 +13935,33 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG))
return Res;
- // TODO: handle v16i8.
+ // TODO: We only extract a single element from v16i8, we can probably afford
+ // to be more aggressive here before using the default approach of spilling to
+ // stack.
+ if (VT.getSizeInBits() == 8 && Op->isOnlyUserOf(Vec.getNode())) {
+ // Extract either the lowest i32 or any i16, and extract the sub-byte.
+ int DWordIdx = IdxVal / 4;
+ if (DWordIdx == 0) {
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getBitcast(MVT::v4i32, Vec),
+ DAG.getIntPtrConstant(DWordIdx, dl));
+ int ShiftVal = (IdxVal % 4) * 8;
+ if (ShiftVal != 0)
+ Res = DAG.getNode(ISD::SRL, dl, MVT::i32, Res,
+ DAG.getConstant(ShiftVal, dl, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ }
+
+ int WordIdx = IdxVal / 2;
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
+ DAG.getBitcast(MVT::v8i16, Vec),
+ DAG.getIntPtrConstant(WordIdx, dl));
+ int ShiftVal = (IdxVal % 2) * 8;
+ if (ShiftVal != 0)
+ Res = DAG.getNode(ISD::SRL, dl, MVT::i16, Res,
+ DAG.getConstant(ShiftVal, dl, MVT::i16));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ }
if (VT.getSizeInBits() == 32) {
if (IdxVal == 0)
diff --git a/llvm/test/CodeGen/X86/extract-store.ll b/llvm/test/CodeGen/X86/extract-store.ll
index fda56f94000..1751f03731d 100644
--- a/llvm/test/CodeGen/X86/extract-store.ll
+++ b/llvm/test/CodeGen/X86/extract-store.ll
@@ -9,22 +9,14 @@
define void @extract_i8_0(i8* nocapture %dst, <16 x i8> %foo) nounwind {
; SSE2-X32-LABEL: extract_i8_0:
; SSE2-X32: # BB#0:
-; SSE2-X32-NEXT: pushl %ebp
-; SSE2-X32-NEXT: movl %esp, %ebp
-; SSE2-X32-NEXT: andl $-16, %esp
-; SSE2-X32-NEXT: subl $32, %esp
-; SSE2-X32-NEXT: movl 8(%ebp), %eax
-; SSE2-X32-NEXT: movaps %xmm0, (%esp)
-; SSE2-X32-NEXT: movb (%esp), %cl
+; SSE2-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE2-X32-NEXT: movd %xmm0, %ecx
; SSE2-X32-NEXT: movb %cl, (%eax)
-; SSE2-X32-NEXT: movl %ebp, %esp
-; SSE2-X32-NEXT: popl %ebp
; SSE2-X32-NEXT: retl
;
; SSE2-X64-LABEL: extract_i8_0:
; SSE2-X64: # BB#0:
-; SSE2-X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-X64-NEXT: movd %xmm0, %eax
; SSE2-X64-NEXT: movb %al, (%rdi)
; SSE2-X64-NEXT: retq
;
@@ -57,22 +49,16 @@ define void @extract_i8_0(i8* nocapture %dst, <16 x i8> %foo) nounwind {
define void @extract_i8_3(i8* nocapture %dst, <16 x i8> %foo) nounwind {
; SSE2-X32-LABEL: extract_i8_3:
; SSE2-X32: # BB#0:
-; SSE2-X32-NEXT: pushl %ebp
-; SSE2-X32-NEXT: movl %esp, %ebp
-; SSE2-X32-NEXT: andl $-16, %esp
-; SSE2-X32-NEXT: subl $32, %esp
-; SSE2-X32-NEXT: movl 8(%ebp), %eax
-; SSE2-X32-NEXT: movaps %xmm0, (%esp)
-; SSE2-X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; SSE2-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE2-X32-NEXT: movd %xmm0, %ecx
+; SSE2-X32-NEXT: shrl $24, %ecx
; SSE2-X32-NEXT: movb %cl, (%eax)
-; SSE2-X32-NEXT: movl %ebp, %esp
-; SSE2-X32-NEXT: popl %ebp
; SSE2-X32-NEXT: retl
;
; SSE2-X64-LABEL: extract_i8_3:
; SSE2-X64: # BB#0:
-; SSE2-X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-X64-NEXT: movd %xmm0, %eax
+; SSE2-X64-NEXT: shrl $24, %eax
; SSE2-X64-NEXT: movb %al, (%rdi)
; SSE2-X64-NEXT: retq
;
@@ -105,23 +91,15 @@ define void @extract_i8_3(i8* nocapture %dst, <16 x i8> %foo) nounwind {
define void @extract_i8_15(i8* nocapture %dst, <16 x i8> %foo) nounwind {
; SSE2-X32-LABEL: extract_i8_15:
; SSE2-X32: # BB#0:
-; SSE2-X32-NEXT: pushl %ebp
-; SSE2-X32-NEXT: movl %esp, %ebp
-; SSE2-X32-NEXT: andl $-16, %esp
-; SSE2-X32-NEXT: subl $32, %esp
-; SSE2-X32-NEXT: movl 8(%ebp), %eax
-; SSE2-X32-NEXT: movaps %xmm0, (%esp)
-; SSE2-X32-NEXT: movb {{[0-9]+}}(%esp), %cl
-; SSE2-X32-NEXT: movb %cl, (%eax)
-; SSE2-X32-NEXT: movl %ebp, %esp
-; SSE2-X32-NEXT: popl %ebp
+; SSE2-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE2-X32-NEXT: pextrw $7, %xmm0, %ecx
+; SSE2-X32-NEXT: movb %ch, (%eax)
; SSE2-X32-NEXT: retl
;
; SSE2-X64-LABEL: extract_i8_15:
; SSE2-X64: # BB#0:
-; SSE2-X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; SSE2-X64-NEXT: movb %al, (%rdi)
+; SSE2-X64-NEXT: pextrw $7, %xmm0, %eax
+; SSE2-X64-NEXT: movb %ah, (%rdi) # NOREX
; SSE2-X64-NEXT: retq
;
; SSE41-X32-LABEL: extract_i8_15:
diff --git a/llvm/test/CodeGen/X86/extractelement-index.ll b/llvm/test/CodeGen/X86/extractelement-index.ll
index 157e42b60a3..e36e33ffe66 100644
--- a/llvm/test/CodeGen/X86/extractelement-index.ll
+++ b/llvm/test/CodeGen/X86/extractelement-index.ll
@@ -11,8 +11,9 @@
define i8 @extractelement_v16i8_1(<16 x i8> %a) nounwind {
; SSE2-LABEL: extractelement_v16i8_1:
; SSE2: # BB#0:
-; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: shrl $8, %eax
+; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE2-NEXT: retq
;
; SSE41-LABEL: extractelement_v16i8_1:
@@ -33,8 +34,9 @@ define i8 @extractelement_v16i8_1(<16 x i8> %a) nounwind {
define i8 @extractelement_v16i8_11(<16 x i8> %a) nounwind {
; SSE2-LABEL: extractelement_v16i8_11:
; SSE2: # BB#0:
-; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-NEXT: pextrw $5, %xmm0, %eax
+; SSE2-NEXT: shrl $8, %eax
+; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE2-NEXT: retq
;
; SSE41-LABEL: extractelement_v16i8_11:
@@ -55,8 +57,8 @@ define i8 @extractelement_v16i8_11(<16 x i8> %a) nounwind {
define i8 @extractelement_v16i8_14(<16 x i8> %a) nounwind {
; SSE2-LABEL: extractelement_v16i8_14:
; SSE2: # BB#0:
-; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-NEXT: pextrw $7, %xmm0, %eax
+; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE2-NEXT: retq
;
; SSE41-LABEL: extractelement_v16i8_14:
@@ -77,8 +79,9 @@ define i8 @extractelement_v16i8_14(<16 x i8> %a) nounwind {
define i8 @extractelement_v32i8_1(<32 x i8> %a) nounwind {
; SSE2-LABEL: extractelement_v32i8_1:
; SSE2: # BB#0:
-; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: shrl $8, %eax
+; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE2-NEXT: retq
;
; SSE41-LABEL: extractelement_v32i8_1:
@@ -100,8 +103,9 @@ define i8 @extractelement_v32i8_1(<32 x i8> %a) nounwind {
define i8 @extractelement_v32i8_17(<32 x i8> %a) nounwind {
; SSE2-LABEL: extractelement_v32i8_17:
; SSE2: # BB#0:
-; SSE2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: shrl $8, %eax
+; SSE2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE2-NEXT: retq
;
; SSE41-LABEL: extractelement_v32i8_17:
OpenPOWER on IntegriCloud