diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-08-23 10:35:24 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-08-23 10:35:24 +0000 |
| commit | 9eb978b47b57949eed0acbaf55297eea1c7ac350 (patch) | |
| tree | ad23c97e09d2558232e0d75fd90eb07499d6a581 | |
| parent | 9e4ecfaec7b7a0821c5083125feba4b94b5e96d5 (diff) | |
| download | bcm5719-llvm-9eb978b47b57949eed0acbaf55297eea1c7ac350.tar.gz bcm5719-llvm-9eb978b47b57949eed0acbaf55297eea1c7ac350.zip | |
[X86][SSE] Demonstrate inability to recognise that (v)cvtpd2ps implicitly zeroes the upper half of the xmm
llvm-svn: 279508
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_fptrunc.ll | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vec_fptrunc.ll b/llvm/test/CodeGen/X86/vec_fptrunc.ll index 343119382e7..a13e1471392 100644 --- a/llvm/test/CodeGen/X86/vec_fptrunc.ll +++ b/llvm/test/CodeGen/X86/vec_fptrunc.ll @@ -133,6 +133,67 @@ entry: ret void } +define <4 x float> @fptrunc_frommem2_zext(<2 x double> * %ld) { +; X32-SSE-LABEL: fptrunc_frommem2_zext: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: cvtpd2ps (%eax), %xmm0 +; X32-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero +; X32-SSE-NEXT: retl +; +; X32-AVX-LABEL: fptrunc_frommem2_zext: +; X32-AVX: # BB#0: +; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX-NEXT: vcvtpd2psx (%eax), %xmm0 +; X32-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; X32-AVX-NEXT: retl +; +; X64-SSE-LABEL: fptrunc_frommem2_zext: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 +; X64-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fptrunc_frommem2_zext: +; X64-AVX: # BB#0: +; X64-AVX-NEXT: vcvtpd2psx (%rdi), %xmm0 +; X64-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; X64-AVX-NEXT: retq + %arg = load <2 x double>, <2 x double> * %ld, align 16 + %cvt = fptrunc <2 x double> %arg to <2 x float> + %ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2> + ret <4 x float> %ret +} + +define <4 x float> @fptrunc_fromreg2_zext(<2 x double> %arg) { +; X32-SSE-LABEL: fptrunc_fromreg2_zext: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 +; X32-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero +; X32-SSE-NEXT: retl +; +; X32-AVX-LABEL: fptrunc_fromreg2_zext: +; X32-AVX: # BB#0: +; X32-AVX-NEXT: vcvtpd2ps %xmm0, %xmm0 +; X32-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; X32-AVX-NEXT: retl +; +; X64-SSE-LABEL: fptrunc_fromreg2_zext: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 +; X64-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fptrunc_fromreg2_zext: +; X64-AVX: # BB#0: +; X64-AVX-NEXT: vcvtpd2ps %xmm0, %xmm0 +; X64-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; X64-AVX-NEXT: retq + %cvt = fptrunc <2 x double> %arg to <2 x float> + %ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2> + ret <4 x float> %ret +} + ; FIXME: For exact truncations we should be able to fold this. define <4 x float> @fptrunc_fromconst() { ; X32-SSE-LABEL: fptrunc_fromconst: |

