From b96bca81e74b925e5cf50a75e88558b6b49a65b0 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 8 Dec 2011 22:30:45 +0000 Subject: Add 256-bit variant vmovss and vmovsd patterns. rdar://10538417 llvm-svn: 146196 --- llvm/lib/Target/X86/X86InstrSSE.td | 21 +++++++++++++++++++++ llvm/test/CodeGen/X86/avx-shuffle.ll | 10 ++++++++++ 2 files changed, 31 insertions(+) (limited to 'llvm') diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 7b19d22f8c4..e806b8f7235 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -645,6 +645,16 @@ let Predicates = [HasAVX] in { (VMOVSSrr (v4f32 VR128:$src1), (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; + // 256-bit variants + def : Pat<(v8i32 (X86Movsd VR256:$src1, VR256:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss), + (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>; + def : Pat<(v8f32 (X86Movsd VR256:$src1, VR256:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss), + (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>; + // Shuffle with VMOVSD def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), (VMOVSDrr VR128:$src1, FR64:$src2)>; @@ -661,6 +671,17 @@ let Predicates = [HasAVX] in { (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>; + // 256-bit variants + def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd), + (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>; + def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd), + (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>; + + // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem // is during lowering, where it's not possible to recognize the fold cause // it has two uses through a bitcast. One use disappears at isel time and the diff --git a/llvm/test/CodeGen/X86/avx-shuffle.ll b/llvm/test/CodeGen/X86/avx-shuffle.ll index 0db334dd994..e66a3d48d38 100644 --- a/llvm/test/CodeGen/X86/avx-shuffle.ll +++ b/llvm/test/CodeGen/X86/avx-shuffle.ll @@ -8,3 +8,13 @@ define <4 x float> @test1(<4 x float> %a) nounwind { ; CHECK: vshufps ; CHECK: vpshufd } + +; rdar://10538417 +define <3 x i64> @test2(<3 x i64> %v) nounwind readnone { +; CHECK: test2: +; CHECK: vxorps +; CHECK: vmovsd + %1 = shufflevector <2 x i64> undef, <2 x i64> undef, <3 x i32> + %2 = shufflevector <3 x i64> zeroinitializer, <3 x i64> %1, <3 x i32> + ret <3 x i64> %2 +} -- cgit v1.2.3