From be5e987379bf5914d4f44ae7b5565a16f14030ca Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Wed, 17 Aug 2011 02:29:19 +0000 Subject: Introduce matching patterns for vbroadcast AVX instruction. The idea is to match splats in the form (splat (scalar_to_vector (load ...))) whenever the load can be folded. All the logic and instruction emission is working but because of PR8156, there are no ways to match loads, cause they can never be folded for splats. Thus, the tests are XFAILed, but I've tested and exercised all the logic using a relaxed version for checking the foldable loads, as if the bug was already fixed. This should work out of the box once PR8156 gets fixed since MayFoldLoad will work as expected. llvm-svn: 137810 --- llvm/test/CodeGen/X86/avx-vbroadcast.ll | 84 +++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 llvm/test/CodeGen/X86/avx-vbroadcast.ll (limited to 'llvm/test/CodeGen') diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll new file mode 100644 index 00000000000..ffa9710677c --- /dev/null +++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll @@ -0,0 +1,84 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s +; XFAIL: * + +; xfail this file for now because of PR8156, when it gets solved merge this with avx-splat.ll + +; CHECK: vbroadcastsd (% +define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp { +entry: + %q = load i64* %ptr, align 8 + %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 + %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 + %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 + %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 + ret <4 x i64> %vecinit6.i +} + +; CHECK: vbroadcastss (% +define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp { +entry: + %q = load i32* %ptr, align 4 + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1 + %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2 + %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3 + ret <8 x i32> %vecinit6.i +} + +; CHECK: vbroadcastsd (% +define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp { +entry: + %q = load double* %ptr, align 8 + %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 + %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 + %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 + %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 + ret <4 x double> %vecinit6.i +} + +; CHECK: vbroadcastss (% +define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp { +entry: + %q = load float* %ptr, align 4 + %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 + %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1 + %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2 + %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3 + ret <8 x float> %vecinit6.i +} + +;;;; 128-bit versions + +; CHECK: vbroadcastss (% +define <4 x float> @E(float* %ptr) nounwind uwtable readnone ssp { +entry: + %q = load float* %ptr, align 4 + %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 + %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 + %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 + %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 + ret <4 x float> %vecinit6.i +} + +; CHECK: vbroadcastss (% +define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp { +entry: + %q = load i32* %ptr, align 4 + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1 + %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2 + %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3 + ret <4 x i32> %vecinit6.i +} + +; Unsupported vbroadcasts + +; CHECK: _G +; CHECK-NOT: vbroadcastsd (% +define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp { +entry: + %q = load i64* %ptr, align 8 + %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 + %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1 + ret <2 x i64> %vecinit2.i +} -- cgit v1.2.3