diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-06-21 11:37:13 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-06-21 11:37:13 +0000 |
| commit | 2a9cde026c3164ec3d4419d1c2da30ffdc70233d (patch) | |
| tree | 6b0cd8803954eec113deb96ba1168b46a1dcd6f1 /llvm/test/Transforms/SLPVectorizer/X86/hsub.ll | |
| parent | d08fbf64863b2c574bf190e005c211edd73817f3 (diff) | |
| download | bcm5719-llvm-2a9cde026c3164ec3d4419d1c2da30ffdc70233d.tar.gz bcm5719-llvm-2a9cde026c3164ec3d4419d1c2da30ffdc70233d.zip | |
[X86][AVX] Reduce v4f64/v4i64 shuffle costs (PR37882)
These were being over cautious for costs for one/two op general shuffles - VSHUFPD doesn't have to replicate the same shuffle in both lanes like VSHUFPS does.
llvm-svn: 335216
Diffstat (limited to 'llvm/test/Transforms/SLPVectorizer/X86/hsub.ll')
| -rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/hsub.ll | 40 |
1 files changed, 10 insertions, 30 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll index e936dc29d45..d6e44aa1d6a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll @@ -211,21 +211,11 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { ; SLM-NEXT: [[R03:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; SLM-NEXT: ret <4 x double> [[R03]] ; -; AVX1-LABEL: @test_v4f64( -; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4> -; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5> -; AVX1-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] -; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6> -; AVX1-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7> -; AVX1-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]] -; AVX1-NEXT: [[R03:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; AVX1-NEXT: ret <4 x double> [[R03]] -; -; AVX2-LABEL: @test_v4f64( -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> -; AVX2-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] -; AVX2-NEXT: ret <4 x double> [[TMP3]] +; AVX-LABEL: @test_v4f64( +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> +; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> +; AVX-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] +; AVX-NEXT: ret <4 x double> [[TMP3]] ; ; AVX512-LABEL: @test_v4f64( ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> @@ -341,21 +331,11 @@ define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) { ; SLM-NEXT: [[R03:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; SLM-NEXT: ret <4 x i64> [[R03]] ; -; AVX1-LABEL: @test_v4i64( -; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 4> -; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 1, i32 5> -; AVX1-NEXT: [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]] -; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 2, i32 6> -; AVX1-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 3, i32 7> -; AVX1-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP4]], [[TMP5]] -; AVX1-NEXT: [[R03:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; AVX1-NEXT: ret <4 x i64> [[R03]] -; -; AVX2-LABEL: @test_v4i64( -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> -; AVX2-NEXT: [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]] -; AVX2-NEXT: ret <4 x i64> [[TMP3]] +; AVX-LABEL: @test_v4i64( +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> +; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> +; AVX-NEXT: [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]] +; AVX-NEXT: ret <4 x i64> [[TMP3]] ; ; AVX512-LABEL: @test_v4i64( ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> |

