summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/haddsub-undef.ll
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2019-01-14 18:44:02 +0000
committerSanjay Patel <spatel@rotateright.com>2019-01-14 18:44:02 +0000
commitb23ff7a0e251edffab75a498978224276bc1ebd0 (patch)
tree85279606a44020e52464f2ff26f1b025f4842b2e /llvm/test/CodeGen/X86/haddsub-undef.ll
parent1b4623176447a8242acbcc509441dd2585f0815d (diff)
downloadbcm5719-llvm-b23ff7a0e251edffab75a498978224276bc1ebd0.tar.gz
bcm5719-llvm-b23ff7a0e251edffab75a498978224276bc1ebd0.zip
[x86] lower extracted add/sub to horizontal vector math
add (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hadd X, X), 0 This is the integer sibling to D56011. There's an additional restriction to only to do this transform in the case where we don't have extra extracts from the source vector. Without that, we can fail to match larger horizontal patterns that are more beneficial than this minimal case. An improvement to the more general h-op lowering may allow us to remove the restriction here in a follow-up. llvm-svn: 351093
Diffstat (limited to 'llvm/test/CodeGen/X86/haddsub-undef.ll')
-rw-r--r--llvm/test/CodeGen/X86/haddsub-undef.ll112
1 files changed, 37 insertions, 75 deletions
diff --git a/llvm/test/CodeGen/X86/haddsub-undef.ll b/llvm/test/CodeGen/X86/haddsub-undef.ll
index 3d13c8f76a2..c27be13104f 100644
--- a/llvm/test/CodeGen/X86/haddsub-undef.ll
+++ b/llvm/test/CodeGen/X86/haddsub-undef.ll
@@ -186,48 +186,27 @@ define <4 x float> @test7_undef(<4 x float> %a, <4 x float> %b) {
}
define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) {
-; SSE-SLOW-LABEL: test8_undef:
-; SSE-SLOW: # %bb.0:
-; SSE-SLOW-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE-SLOW-NEXT: addss %xmm0, %xmm1
-; SSE-SLOW-NEXT: movaps %xmm0, %xmm2
-; SSE-SLOW-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
-; SSE-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; SSE-SLOW-NEXT: addss %xmm2, %xmm0
-; SSE-SLOW-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE-SLOW-NEXT: movaps %xmm1, %xmm0
-; SSE-SLOW-NEXT: retq
-;
-; SSE-FAST-LABEL: test8_undef:
-; SSE-FAST: # %bb.0:
-; SSE-FAST-NEXT: movaps %xmm0, %xmm1
-; SSE-FAST-NEXT: haddps %xmm0, %xmm1
-; SSE-FAST-NEXT: movaps %xmm0, %xmm2
-; SSE-FAST-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
-; SSE-FAST-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; SSE-FAST-NEXT: addss %xmm2, %xmm0
-; SSE-FAST-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE-FAST-NEXT: movaps %xmm1, %xmm0
-; SSE-FAST-NEXT: retq
-;
-; AVX-SLOW-LABEL: test8_undef:
-; AVX-SLOW: # %bb.0:
-; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm1
-; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; AVX-SLOW-NEXT: vaddss %xmm0, %xmm2, %xmm0
-; AVX-SLOW-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
-; AVX-SLOW-NEXT: retq
+; SSE-LABEL: test8_undef:
+; SSE: # %bb.0:
+; SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE-NEXT: addss %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm0, %xmm2
+; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; SSE-NEXT: addss %xmm2, %xmm0
+; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: retq
;
-; AVX-FAST-LABEL: test8_undef:
-; AVX-FAST: # %bb.0:
-; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm1
-; AVX-FAST-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX-FAST-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; AVX-FAST-NEXT: vaddss %xmm0, %xmm2, %xmm0
-; AVX-FAST-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX-FAST-NEXT: retq
+; AVX-LABEL: test8_undef:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX-NEXT: vaddss %xmm0, %xmm2, %xmm0
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; AVX-NEXT: retq
%vecext = extractelement <4 x float> %a, i32 0
%vecext1 = extractelement <4 x float> %a, i32 1
%add = fadd float %vecext, %vecext1
@@ -382,40 +361,23 @@ define <16 x float> @test13_v16f32_undef(<16 x float> %a, <16 x float> %b) {
; AVX1-FAST-NEXT: vhaddps %xmm1, %xmm0, %xmm0
; AVX1-FAST-NEXT: retq
;
-; AVX512-SLOW-LABEL: test13_v16f32_undef:
-; AVX512-SLOW: # %bb.0:
-; AVX512-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm1
-; AVX512-SLOW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512-SLOW-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3]
-; AVX512-SLOW-NEXT: vaddss %xmm3, %xmm2, %xmm2
-; AVX512-SLOW-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
-; AVX512-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX512-SLOW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512-SLOW-NEXT: vaddss %xmm2, %xmm0, %xmm2
-; AVX512-SLOW-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
-; AVX512-SLOW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; AVX512-SLOW-NEXT: vaddss %xmm0, %xmm2, %xmm0
-; AVX512-SLOW-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX512-SLOW-NEXT: retq
-;
-; AVX512-FAST-LABEL: test13_v16f32_undef:
-; AVX512-FAST: # %bb.0:
-; AVX512-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm1
-; AVX512-FAST-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512-FAST-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3]
-; AVX512-FAST-NEXT: vaddss %xmm3, %xmm2, %xmm2
-; AVX512-FAST-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
-; AVX512-FAST-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX512-FAST-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512-FAST-NEXT: vaddss %xmm2, %xmm0, %xmm2
-; AVX512-FAST-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
-; AVX512-FAST-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512-FAST-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; AVX512-FAST-NEXT: vaddss %xmm0, %xmm2, %xmm0
-; AVX512-FAST-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX512-FAST-NEXT: retq
+; AVX512-LABEL: test13_v16f32_undef:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX512-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3]
+; AVX512-NEXT: vaddss %xmm3, %xmm2, %xmm2
+; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
+; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm2
+; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX512-NEXT: vaddss %xmm0, %xmm2, %xmm0
+; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX512-NEXT: retq
%vecext = extractelement <16 x float> %a, i32 0
%vecext1 = extractelement <16 x float> %a, i32 1
%add1 = fadd float %vecext, %vecext1
OpenPOWER on IntegriCloud