From df4af41b9b734e9fdf0453ab092013199b4ad4e0 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 25 Dec 2012 12:54:19 +0000 Subject: X86: Custom lower <2 x i64> eq and ne when SSE41 is not available. pcmpeqd, pshufd, pshufd, pand is cheaper than unpack + cmpq, sbbq, cmpq, sbbq + pack. Small speedup on loop-vectorized viterbi (-march=core2). llvm-svn: 171063 --- llvm/test/CodeGen/X86/vec_compare.ll | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'llvm/test/CodeGen/X86/vec_compare.ll') diff --git a/llvm/test/CodeGen/X86/vec_compare.ll b/llvm/test/CodeGen/X86/vec_compare.ll index 367dd27f307..cf86c737c73 100644 --- a/llvm/test/CodeGen/X86/vec_compare.ll +++ b/llvm/test/CodeGen/X86/vec_compare.ll @@ -41,3 +41,29 @@ define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind { %D = sext <4 x i1> %C to <4 x i32> ret <4 x i32> %D } + +define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) nounwind { +; CHECK: test5: +; CHECK: pcmpeqd +; CHECK: pshufd $-11 +; CHECK: pshufd $-96 +; CHECK: pand +; CHECK: ret + %C = icmp eq <2 x i64> %A, %B + %D = sext <2 x i1> %C to <2 x i64> + ret <2 x i64> %D +} + +define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) nounwind { +; CHECK: test6: +; CHECK: pcmpeqd +; CHECK: pshufd $-11 +; CHECK: pshufd $-96 +; CHECK: pand +; CHECK: pcmpeqd +; CHECK: pxor +; CHECK: ret + %C = icmp ne <2 x i64> %A, %B + %D = sext <2 x i1> %C to <2 x i64> + ret <2 x i64> %D +} -- cgit v1.2.3