summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp3
-rw-r--r--llvm/test/CodeGen/X86/vector-sext.ll56
2 files changed, 58 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 23a302f3e56..ab36bc1417a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14567,7 +14567,8 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// extract instead or remove that condition entirely.
auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
- if (!Ld || !Ld->hasOneUse() || Ld->isVolatile() || !ExtIdx)
+ if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
+ !ExtIdx)
return SDValue();
// The narrow load will be offset from the base address of the old load if
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index 8cc1d8c765a..53e471d6f17 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -1749,6 +1749,62 @@ entry:
ret <4 x i64> %Y
}
+define <2 x i64> @load_sext_4i8_to_4i64_extract(<4 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_4i8_to_4i64_extract:
+; SSE2: # BB#0:
+; SSE2-NEXT: movsbq 3(%rdi), %rax
+; SSE2-NEXT: movq %rax, %xmm1
+; SSE2-NEXT: movsbq 2(%rdi), %rax
+; SSE2-NEXT: movq %rax, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_sext_4i8_to_4i64_extract:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movsbq 3(%rdi), %rax
+; SSSE3-NEXT: movq %rax, %xmm1
+; SSSE3-NEXT: movsbq 2(%rdi), %rax
+; SSSE3-NEXT: movq %rax, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_sext_4i8_to_4i64_extract:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: load_sext_4i8_to_4i64_extract:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovsxbd (%rdi), %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_sext_4i8_to_4i64_extract:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_sext_4i8_to_4i64_extract:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_4i8_to_4i64_extract:
+; X32-SSE41: # BB#0:
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: pmovsxbq 2(%eax), %xmm0
+; X32-SSE41-NEXT: retl
+ %ld = load <4 x i8>, <4 x i8>* %ptr
+ %sext = sext <4 x i8> %ld to <4 x i64>
+ %extract = shufflevector <4 x i64> %sext, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+ ret <2 x i64> %extract
+}
+
define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; SSE2-LABEL: load_sext_8i1_to_8i16:
; SSE2: # BB#0: # %entry
OpenPOWER on IntegriCloud