From 2a9cde026c3164ec3d4419d1c2da30ffdc70233d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 21 Jun 2018 11:37:13 +0000 Subject: [X86][AVX] Reduce v4f64/v4i64 shuffle costs (PR37882) These were being over cautious for costs for one/two op general shuffles - VSHUFPD doesn't have to replicate the same shuffle in both lanes like VSHUFPS does. llvm-svn: 335216 --- llvm/test/Transforms/SLPVectorizer/X86/hsub.ll | 40 +++++++------------------- 1 file changed, 10 insertions(+), 30 deletions(-) (limited to 'llvm/test/Transforms/SLPVectorizer/X86/hsub.ll') diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll index e936dc29d45..d6e44aa1d6a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll @@ -211,21 +211,11 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { ; SLM-NEXT: [[R03:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> ; SLM-NEXT: ret <4 x double> [[R03]] ; -; AVX1-LABEL: @test_v4f64( -; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> -; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; AVX1-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] -; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; AVX1-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; AVX1-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]] -; AVX1-NEXT: [[R03:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> -; AVX1-NEXT: ret <4 x double> [[R03]] -; -; AVX2-LABEL: @test_v4f64( -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> -; AVX2-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] -; AVX2-NEXT: ret <4 x double> [[TMP3]] +; AVX-LABEL: @test_v4f64( +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> +; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; AVX-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] +; AVX-NEXT: ret <4 x double> [[TMP3]] ; ; AVX512-LABEL: @test_v4f64( ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> @@ -341,21 +331,11 @@ define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) { ; SLM-NEXT: [[R03:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> ; SLM-NEXT: ret <4 x i64> [[R03]] ; -; AVX1-LABEL: @test_v4i64( -; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <2 x i32> -; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> -; AVX1-NEXT: [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]] -; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> -; AVX1-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> -; AVX1-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP4]], [[TMP5]] -; AVX1-NEXT: [[R03:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> -; AVX1-NEXT: ret <4 x i64> [[R03]] -; -; AVX2-LABEL: @test_v4i64( -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> -; AVX2-NEXT: [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]] -; AVX2-NEXT: ret <4 x i64> [[TMP3]] +; AVX-LABEL: @test_v4i64( +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> +; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> +; AVX-NEXT: [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]] +; AVX-NEXT: ret <4 x i64> [[TMP3]] ; ; AVX512-LABEL: @test_v4i64( ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> -- cgit v1.2.3