summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-10-21 20:19:48 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-10-21 20:19:48 +0000
commit3cb024490a2087c1c8fed1716333a188112bd174 (patch)
tree90c1f7d268599e0fa0a8d01904b829f8b9667a73 /llvm
parentca2382d8097b5cee4dfbeb30f7b791aa242f313a (diff)
downloadbcm5719-llvm-3cb024490a2087c1c8fed1716333a188112bd174.tar.gz
bcm5719-llvm-3cb024490a2087c1c8fed1716333a188112bd174.zip
[X86][SSE] Add extractps/pextrd equivalence to domain tables
Differential Revision: https://reviews.llvm.org/D39135 llvm-svn: 316274
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp6
-rw-r--r--llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll6
-rw-r--r--llvm/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll6
-rw-r--r--llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll8
-rw-r--r--llvm/test/CodeGen/X86/avx512-insert-extract.ll16
-rw-r--r--llvm/test/CodeGen/X86/extract-store.ll8
-rw-r--r--llvm/test/CodeGen/X86/extractelement-index.ll25
-rw-r--r--llvm/test/CodeGen/X86/known-signbits-vector.ll2
-rw-r--r--llvm/test/CodeGen/X86/nontemporal-2.ll6
-rw-r--r--llvm/test/CodeGen/X86/oddshuffles.ll22
-rw-r--r--llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll8
-rw-r--r--llvm/test/CodeGen/X86/sse41-schedule.ll18
-rw-r--r--llvm/test/CodeGen/X86/sse41.ll7
-rw-r--r--llvm/test/CodeGen/X86/widen_load-3.ll40
14 files changed, 87 insertions, 91 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index ae4d123e4cf..9a64c357f02 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -9451,6 +9451,8 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::UNPCKLPSrr, X86::UNPCKLPSrr, X86::PUNPCKLDQrr },
{ X86::UNPCKHPSrm, X86::UNPCKHPSrm, X86::PUNPCKHDQrm },
{ X86::UNPCKHPSrr, X86::UNPCKHPSrr, X86::PUNPCKHDQrr },
+ { X86::EXTRACTPSmr, X86::EXTRACTPSmr, X86::PEXTRDmr },
+ { X86::EXTRACTPSrr, X86::EXTRACTPSrr, X86::PEXTRDrr },
// AVX 128-bit support
{ X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
{ X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
@@ -9479,6 +9481,8 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VUNPCKLPSrr, X86::VUNPCKLPSrr, X86::VPUNPCKLDQrr },
{ X86::VUNPCKHPSrm, X86::VUNPCKHPSrm, X86::VPUNPCKHDQrm },
{ X86::VUNPCKHPSrr, X86::VUNPCKHPSrr, X86::VPUNPCKHDQrr },
+ { X86::VEXTRACTPSmr, X86::VEXTRACTPSmr, X86::VPEXTRDmr },
+ { X86::VEXTRACTPSrr, X86::VEXTRACTPSrr, X86::VPEXTRDrr },
// AVX 256-bit support
{ X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
{ X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
@@ -9577,6 +9581,8 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrr, X86::VPUNPCKLDQZrr },
{ X86::VUNPCKHPSZrm, X86::VUNPCKHPSZrm, X86::VPUNPCKHDQZrm },
{ X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrr, X86::VPUNPCKHDQZrr },
+ { X86::VEXTRACTPSZmr, X86::VEXTRACTPSZmr, X86::VPEXTRDZmr },
+ { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZrr, X86::VPEXTRDZrr },
};
static const uint16_t ReplaceableInstrsAVX2[][3] = {
diff --git a/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
index 997d2fd1ae7..416761ffef4 100644
--- a/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
+++ b/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
@@ -49,9 +49,9 @@ entry:
define void @zero_test() {
; X32-LABEL: zero_test:
; X32: # BB#0: # %entry
-; X32-NEXT: pxor %xmm0, %xmm0
-; X32-NEXT: pextrd $1, %xmm0, (%eax)
-; X32-NEXT: movd %xmm0, (%eax)
+; X32-NEXT: xorps %xmm0, %xmm0
+; X32-NEXT: extractps $1, %xmm0, (%eax)
+; X32-NEXT: movss %xmm0, (%eax)
; X32-NEXT: retl
;
; X64-LABEL: zero_test:
diff --git a/llvm/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll b/llvm/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
index 3439ebcf9de..c87b04485e4 100644
--- a/llvm/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
+++ b/llvm/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
@@ -10,12 +10,12 @@
define <4 x i32> @test(<4 x i32>* %p) {
; CHECK-LABEL: test:
; CHECK: # BB#0:
-; CHECK-NEXT: movdqa (%rdi), %xmm0
-; CHECK-NEXT: pextrd $2, %xmm0, %eax
+; CHECK-NEXT: movaps (%rdi), %xmm0
+; CHECK-NEXT: extractps $2, %xmm0, %eax
; CHECK-NEXT: cmpl $3, %eax
; CHECK-NEXT: je .LBB0_2
; CHECK-NEXT: # BB#1:
-; CHECK-NEXT: pxor %xmm0, %xmm0
+; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: retq
%v = load <4 x i32>, <4 x i32>* %p
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
index e50f9396217..1ae93dc747f 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
@@ -792,14 +792,14 @@ define i32 @test_mm256_extract_epi32(<4 x i64> %a0) nounwind {
; X32-LABEL: test_mm256_extract_epi32:
; X32: # BB#0:
; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
-; X32-NEXT: vpextrd $1, %xmm0, %eax
+; X32-NEXT: vextractps $1, %xmm0, %eax
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_extract_epi32:
; X64: # BB#0:
; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
-; X64-NEXT: vpextrd $1, %xmm0, %eax
+; X64-NEXT: vextractps $1, %xmm0, %eax
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
@@ -811,8 +811,8 @@ define i64 @test_mm256_extract_epi64(<4 x i64> %a0) nounwind {
; X32-LABEL: test_mm256_extract_epi64:
; X32: # BB#0:
; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
-; X32-NEXT: vpextrd $2, %xmm0, %eax
-; X32-NEXT: vpextrd $3, %xmm0, %edx
+; X32-NEXT: vextractps $2, %xmm0, %eax
+; X32-NEXT: vextractps $3, %xmm0, %edx
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 7db93c9e2db..bff09d6b000 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -424,9 +424,9 @@ define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
; CHECK-LABEL: extract_v16i32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpextrd $1, %xmm0, %eax
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vpextrd $1, %xmm0, (%rdi)
+; CHECK-NEXT: vextractps $1, %xmm0, %eax
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vextractps $1, %xmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%r1 = extractelement <16 x i32> %x, i32 1
@@ -438,9 +438,9 @@ define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
; CHECK-LABEL: extract_v8i32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpextrd $1, %xmm0, %eax
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vpextrd $1, %xmm0, (%rdi)
+; CHECK-NEXT: vextractps $1, %xmm0, %eax
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vextractps $1, %xmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%r1 = extractelement <8 x i32> %x, i32 1
@@ -452,8 +452,8 @@ define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
; CHECK-LABEL: extract_v4i32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpextrd $1, %xmm0, %eax
-; CHECK-NEXT: vpextrd $3, %xmm0, (%rdi)
+; CHECK-NEXT: vextractps $1, %xmm0, %eax
+; CHECK-NEXT: vextractps $3, %xmm0, (%rdi)
; CHECK-NEXT: retq
%r1 = extractelement <4 x i32> %x, i32 1
%r2 = extractelement <4 x i32> %x, i32 3
diff --git a/llvm/test/CodeGen/X86/extract-store.ll b/llvm/test/CodeGen/X86/extract-store.ll
index e39f3f170a2..225d2e9a107 100644
--- a/llvm/test/CodeGen/X86/extract-store.ll
+++ b/llvm/test/CodeGen/X86/extract-store.ll
@@ -285,23 +285,23 @@ define void @extract_i32_3(i32* nocapture %dst, <4 x i32> %foo) nounwind {
; SSE41-X32-LABEL: extract_i32_3:
; SSE41-X32: # BB#0:
; SSE41-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SSE41-X32-NEXT: pextrd $3, %xmm0, (%eax)
+; SSE41-X32-NEXT: extractps $3, %xmm0, (%eax)
; SSE41-X32-NEXT: retl
;
; SSE41-X64-LABEL: extract_i32_3:
; SSE41-X64: # BB#0:
-; SSE41-X64-NEXT: pextrd $3, %xmm0, (%rdi)
+; SSE41-X64-NEXT: extractps $3, %xmm0, (%rdi)
; SSE41-X64-NEXT: retq
;
; AVX-X32-LABEL: extract_i32_3:
; AVX-X32: # BB#0:
; AVX-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX-X32-NEXT: vpextrd $3, %xmm0, (%eax)
+; AVX-X32-NEXT: vextractps $3, %xmm0, (%eax)
; AVX-X32-NEXT: retl
;
; AVX-X64-LABEL: extract_i32_3:
; AVX-X64: # BB#0:
-; AVX-X64-NEXT: vpextrd $3, %xmm0, (%rdi)
+; AVX-X64-NEXT: vextractps $3, %xmm0, (%rdi)
; AVX-X64-NEXT: retq
;
; SSE-F128-LABEL: extract_i32_3:
diff --git a/llvm/test/CodeGen/X86/extractelement-index.ll b/llvm/test/CodeGen/X86/extractelement-index.ll
index 228ce70b400..8a6cdaf203c 100644
--- a/llvm/test/CodeGen/X86/extractelement-index.ll
+++ b/llvm/test/CodeGen/X86/extractelement-index.ll
@@ -231,12 +231,12 @@ define i32 @extractelement_v4i32_3(<4 x i32> %a) nounwind {
;
; SSE41-LABEL: extractelement_v4i32_3:
; SSE41: # BB#0:
-; SSE41-NEXT: pextrd $3, %xmm0, %eax
+; SSE41-NEXT: extractps $3, %xmm0, %eax
; SSE41-NEXT: retq
;
; AVX-LABEL: extractelement_v4i32_3:
; AVX: # BB#0:
-; AVX-NEXT: vpextrd $3, %xmm0, %eax
+; AVX-NEXT: vextractps $3, %xmm0, %eax
; AVX-NEXT: retq
%b = extractelement <4 x i32> %a, i256 3
ret i32 %b
@@ -297,22 +297,15 @@ define i32 @extractelement_v8i32_7(<8 x i32> %a) nounwind {
;
; SSE41-LABEL: extractelement_v8i32_7:
; SSE41: # BB#0:
-; SSE41-NEXT: pextrd $3, %xmm1, %eax
+; SSE41-NEXT: extractps $3, %xmm1, %eax
; SSE41-NEXT: retq
;
-; AVX1-LABEL: extractelement_v8i32_7:
-; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpextrd $3, %xmm0, %eax
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: extractelement_v8i32_7:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT: vpextrd $3, %xmm0, %eax
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
+; AVX-LABEL: extractelement_v8i32_7:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vextractps $3, %xmm0, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
%b = extractelement <8 x i32> %a, i64 7
ret i32 %b
}
diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll
index ec620b8ce87..7506062b615 100644
--- a/llvm/test/CodeGen/X86/known-signbits-vector.ll
+++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll
@@ -76,7 +76,7 @@ define float @signbits_ashr_extract_sitofp(<2 x i64> %a0) nounwind {
; X32-LABEL: signbits_ashr_extract_sitofp:
; X32: # BB#0:
; X32-NEXT: pushl %eax
-; X32-NEXT: vpextrd $1, %xmm0, %eax
+; X32-NEXT: vextractps $1, %xmm0, %eax
; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
; X32-NEXT: vmovss %xmm0, (%esp)
; X32-NEXT: flds (%esp)
diff --git a/llvm/test/CodeGen/X86/nontemporal-2.ll b/llvm/test/CodeGen/X86/nontemporal-2.ll
index 9a7897edbea..b6f2314b31e 100644
--- a/llvm/test/CodeGen/X86/nontemporal-2.ll
+++ b/llvm/test/CodeGen/X86/nontemporal-2.ll
@@ -541,19 +541,19 @@ define void @test_extract_i32(<4 x i32> %arg, i32* %dst) {
;
; SSE41-LABEL: test_extract_i32:
; SSE41: # BB#0:
-; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: extractps $1, %xmm0, %eax
; SSE41-NEXT: movntil %eax, (%rdi)
; SSE41-NEXT: retq
;
; AVX-LABEL: test_extract_i32:
; AVX: # BB#0:
-; AVX-NEXT: vpextrd $1, %xmm0, %eax
+; AVX-NEXT: vextractps $1, %xmm0, %eax
; AVX-NEXT: movntil %eax, (%rdi)
; AVX-NEXT: retq
;
; VLX-LABEL: test_extract_i32:
; VLX: # BB#0:
-; VLX-NEXT: vpextrd $1, %xmm0, %eax
+; VLX-NEXT: vextractps $1, %xmm0, %eax
; VLX-NEXT: movntil %eax, (%rdi)
; VLX-NEXT: retq
%1 = extractelement <4 x i32> %arg, i32 1
diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll
index 573111d5d25..02a399b4898 100644
--- a/llvm/test/CodeGen/X86/oddshuffles.ll
+++ b/llvm/test/CodeGen/X86/oddshuffles.ll
@@ -112,10 +112,10 @@ define void @v3i32(<2 x i32> %a, <2 x i32> %b, <3 x i32>* %p) nounwind {
;
; AVX2-LABEL: v3i32:
; AVX2: # BB#0:
-; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
-; AVX2-NEXT: vpextrd $2, %xmm0, 8(%rdi)
-; AVX2-NEXT: vmovq %xmm1, (%rdi)
+; AVX2-NEXT: vbroadcastss %xmm1, %xmm1
+; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
+; AVX2-NEXT: vextractps $2, %xmm0, 8(%rdi)
+; AVX2-NEXT: vmovlps %xmm1, (%rdi)
; AVX2-NEXT: retq
;
; XOP-LABEL: v3i32:
@@ -199,18 +199,18 @@ define void @v5i32(<4 x i32> %a, <4 x i32> %b, <5 x i32>* %p) nounwind {
; AVX1: # BB#0:
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2]
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3]
-; AVX1-NEXT: vpextrd $3, %xmm0, 16(%rdi)
+; AVX1-NEXT: vextractps $3, %xmm0, 16(%rdi)
; AVX1-NEXT: vmovaps %xmm1, (%rdi)
; AVX1-NEXT: retq
;
; AVX2-LABEL: v5i32:
; AVX2: # BB#0:
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,5,1,6,3,u,u,u>
-; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
-; AVX2-NEXT: vpextrd $3, %xmm0, 16(%rdi)
-; AVX2-NEXT: vmovdqa %xmm1, (%rdi)
+; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
+; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,5,1,6,3,u,u,u>
+; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vextractps $3, %xmm0, 16(%rdi)
+; AVX2-NEXT: vmovaps %xmm1, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@@ -218,7 +218,7 @@ define void @v5i32(<4 x i32> %a, <4 x i32> %b, <5 x i32>* %p) nounwind {
; XOP: # BB#0:
; XOP-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2]
; XOP-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3]
-; XOP-NEXT: vpextrd $3, %xmm0, 16(%rdi)
+; XOP-NEXT: vextractps $3, %xmm0, 16(%rdi)
; XOP-NEXT: vmovaps %xmm1, (%rdi)
; XOP-NEXT: retq
%r = shufflevector <4 x i32> %a, <4 x i32> %b, <5 x i32> <i32 0, i32 5, i32 1, i32 6, i32 3>
diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
index acf91cbeea1..b35c9766c16 100644
--- a/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
@@ -440,12 +440,12 @@ define i32 @test_mm_extract_epi8(<2 x i64> %a0) {
define i32 @test_mm_extract_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm_extract_epi32:
; X32: # BB#0:
-; X32-NEXT: pextrd $1, %xmm0, %eax
+; X32-NEXT: extractps $1, %xmm0, %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_extract_epi32:
; X64: # BB#0:
-; X64-NEXT: pextrd $1, %xmm0, %eax
+; X64-NEXT: extractps $1, %xmm0, %eax
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%ext = extractelement <4 x i32> %arg0, i32 1
@@ -455,8 +455,8 @@ define i32 @test_mm_extract_epi32(<2 x i64> %a0) {
define i64 @test_mm_extract_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm_extract_epi64:
; X32: # BB#0:
-; X32-NEXT: pextrd $2, %xmm0, %eax
-; X32-NEXT: pextrd $3, %xmm0, %edx
+; X32-NEXT: extractps $2, %xmm0, %eax
+; X32-NEXT: extractps $3, %xmm0, %edx
; X32-NEXT: retl
;
; X64-LABEL: test_mm_extract_epi64:
diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll
index 56f2dd4ff1c..ab163bd2e99 100644
--- a/llvm/test/CodeGen/X86/sse41-schedule.ll
+++ b/llvm/test/CodeGen/X86/sse41-schedule.ll
@@ -949,61 +949,71 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) {
define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
; GENERIC-LABEL: test_pextrd:
; GENERIC: # BB#0:
+; GENERIC-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
; GENERIC-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SLM-LABEL: test_pextrd:
; SLM: # BB#0:
+; SLM-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
; SLM-NEXT: pextrd $3, %xmm0, %eax # sched: [1:1.00]
; SLM-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [4:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_pextrd:
; SANDY: # BB#0:
+; SANDY-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pextrd:
; HASWELL: # BB#0:
+; HASWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BROADWELL-LABEL: test_pextrd:
; BROADWELL: # BB#0:
+; BROADWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00]
; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [2:1.00]
;
; SKYLAKE-LABEL: test_pextrd:
; SKYLAKE: # BB#0:
+; SKYLAKE-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pextrd:
; SKX: # BB#0:
+; SKX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_pextrd:
; BTVER2: # BB#0:
+; BTVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.50]
; BTVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_pextrd:
; ZNVER1: # BB#0:
+; ZNVER1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.25]
; ZNVER1-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [8:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = extractelement <4 x i32> %a0, i32 3
- %2 = extractelement <4 x i32> %a0, i32 1
- store i32 %2, i32 *%a1
- ret i32 %1
+ %1 = add <4 x i32> %a0, %a0
+ %2 = extractelement <4 x i32> %1, i32 3
+ %3 = extractelement <4 x i32> %1, i32 1
+ store i32 %3, i32 *%a1
+ ret i32 %2
}
define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) {
diff --git a/llvm/test/CodeGen/X86/sse41.ll b/llvm/test/CodeGen/X86/sse41.ll
index 9f30767b10d..98ddd6d7f13 100644
--- a/llvm/test/CodeGen/X86/sse41.ll
+++ b/llvm/test/CodeGen/X86/sse41.ll
@@ -108,6 +108,7 @@ define float @ext_1(<4 x float> %v) nounwind {
%t = fadd float %s, 1.0
ret float %t
}
+
define float @ext_2(<4 x float> %v) nounwind {
; X32-LABEL: ext_2:
; X32: ## BB#0:
@@ -125,15 +126,16 @@ define float @ext_2(<4 x float> %v) nounwind {
%s = extractelement <4 x float> %v, i32 3
ret float %s
}
+
define i32 @ext_3(<4 x i32> %v) nounwind {
; X32-LABEL: ext_3:
; X32: ## BB#0:
-; X32-NEXT: pextrd $3, %xmm0, %eax
+; X32-NEXT: extractps $3, %xmm0, %eax
; X32-NEXT: retl
;
; X64-LABEL: ext_3:
; X64: ## BB#0:
-; X64-NEXT: pextrd $3, %xmm0, %eax
+; X64-NEXT: extractps $3, %xmm0, %eax
; X64-NEXT: retq
%i = extractelement <4 x i32> %v, i32 3
ret i32 %i
@@ -261,7 +263,6 @@ define i32 @ptestz_3(<2 x i64> %t1, <2 x i64> %t2) nounwind {
ret i32 %tmp1
}
-
declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/widen_load-3.ll b/llvm/test/CodeGen/X86/widen_load-3.ll
index 3fd618fa678..bc36c5fbd57 100644
--- a/llvm/test/CodeGen/X86/widen_load-3.ll
+++ b/llvm/test/CodeGen/X86/widen_load-3.ll
@@ -25,33 +25,19 @@ define <7 x i64> @load7_aligned(<7 x i64>* %x) {
; X86-SSE-NEXT: movaps %xmm0, (%eax)
; X86-SSE-NEXT: retl $4
;
-; X86-AVX1-LABEL: load7_aligned:
-; X86-AVX1: # BB#0:
-; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-AVX1-NEXT: vmovaps (%ecx), %ymm0
-; X86-AVX1-NEXT: vmovaps 32(%ecx), %ymm1
-; X86-AVX1-NEXT: vmovaps %ymm0, (%eax)
-; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
-; X86-AVX1-NEXT: vpextrd $1, %xmm0, 52(%eax)
-; X86-AVX1-NEXT: vmovd %xmm0, 48(%eax)
-; X86-AVX1-NEXT: vmovaps %xmm1, 32(%eax)
-; X86-AVX1-NEXT: vzeroupper
-; X86-AVX1-NEXT: retl $4
-;
-; X86-AVX2-LABEL: load7_aligned:
-; X86-AVX2: # BB#0:
-; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-AVX2-NEXT: vmovaps (%ecx), %ymm0
-; X86-AVX2-NEXT: vmovdqa 32(%ecx), %ymm1
-; X86-AVX2-NEXT: vmovaps %ymm0, (%eax)
-; X86-AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0
-; X86-AVX2-NEXT: vpextrd $1, %xmm0, 52(%eax)
-; X86-AVX2-NEXT: vmovd %xmm0, 48(%eax)
-; X86-AVX2-NEXT: vmovdqa %xmm1, 32(%eax)
-; X86-AVX2-NEXT: vzeroupper
-; X86-AVX2-NEXT: retl $4
+; X86-AVX-LABEL: load7_aligned:
+; X86-AVX: # BB#0:
+; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX-NEXT: vmovaps (%ecx), %ymm0
+; X86-AVX-NEXT: vmovaps 32(%ecx), %ymm1
+; X86-AVX-NEXT: vmovaps %ymm0, (%eax)
+; X86-AVX-NEXT: vextractf128 $1, %ymm1, %xmm0
+; X86-AVX-NEXT: vextractps $1, %xmm0, 52(%eax)
+; X86-AVX-NEXT: vmovss %xmm0, 48(%eax)
+; X86-AVX-NEXT: vmovaps %xmm1, 32(%eax)
+; X86-AVX-NEXT: vzeroupper
+; X86-AVX-NEXT: retl $4
;
; X64-SSE-LABEL: load7_aligned:
; X64-SSE: # BB#0:
OpenPOWER on IntegriCloud