diff options
author | Craig Topper <craig.topper@intel.com> | 2019-07-02 17:51:02 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2019-07-02 17:51:02 +0000 |
commit | cffbaa93b72b307904935c380f90d49d00c7ecdc (patch) | |
tree | a31e28a995d70ea4c8c32a9287f21210880fe0eb /llvm/test/CodeGen/X86/half.ll | |
parent | 36face4c1df75c1e4e82c3f26b0b98495af9359e (diff) | |
download | bcm5719-llvm-cffbaa93b72b307904935c380f90d49d00c7ecdc.tar.gz bcm5719-llvm-cffbaa93b72b307904935c380f90d49d00c7ecdc.zip |
[X86] Add patterns to select (scalar_to_vector (loadf32)) as (V)MOVSSrm instead of COPY_TO_REGCLASS + (V)MOVSSrm_alt.
Similar for (V)MOVSD. Ultimately, I'd like to see about folding
scalar_to_vector+load to vzload. Which would select as (V)MOVSSrm
so this is closer to that.
llvm-svn: 364948
Diffstat (limited to 'llvm/test/CodeGen/X86/half.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/half.ll | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll index f180bef4120..095dfa2b04a 100644 --- a/llvm/test/CodeGen/X86/half.ll +++ b/llvm/test/CodeGen/X86/half.ll @@ -431,18 +431,18 @@ define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { ; CHECK-I686-NEXT: pushl %esi ; CHECK-I686-NEXT: subl $56, %esp ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-I686-NEXT: movzwl 4(%esi), %eax +; CHECK-I686-NEXT: movzwl 2(%esi), %eax ; CHECK-I686-NEXT: movl %eax, (%esp) ; CHECK-I686-NEXT: calll __gnu_h2f_ieee ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill -; CHECK-I686-NEXT: movzwl 2(%esi), %eax +; CHECK-I686-NEXT: movzwl 4(%esi), %eax ; CHECK-I686-NEXT: movl %eax, (%esp) ; CHECK-I686-NEXT: calll __gnu_h2f_ieee ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill -; CHECK-I686-NEXT: movzwl (%esi), %eax +; CHECK-I686-NEXT: movzwl 6(%esi), %eax ; CHECK-I686-NEXT: movl %eax, (%esp) ; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: movzwl 6(%esi), %eax +; CHECK-I686-NEXT: movzwl (%esi), %eax ; CHECK-I686-NEXT: movl %eax, (%esp) ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload @@ -453,10 +453,10 @@ define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-I686-NEXT: addl $56, %esp ; CHECK-I686-NEXT: popl %esi |