summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-06-21 11:37:13 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-06-21 11:37:13 +0000
commit2a9cde026c3164ec3d4419d1c2da30ffdc70233d (patch)
tree6b0cd8803954eec113deb96ba1168b46a1dcd6f1 /llvm/test/Transforms/SLPVectorizer/X86/hsub.ll
parentd08fbf64863b2c574bf190e005c211edd73817f3 (diff)
downloadbcm5719-llvm-2a9cde026c3164ec3d4419d1c2da30ffdc70233d.tar.gz
bcm5719-llvm-2a9cde026c3164ec3d4419d1c2da30ffdc70233d.zip
[X86][AVX] Reduce v4f64/v4i64 shuffle costs (PR37882)
These were being over cautious for costs for one/two op general shuffles - VSHUFPD doesn't have to replicate the same shuffle in both lanes like VSHUFPS does. llvm-svn: 335216
Diffstat (limited to 'llvm/test/Transforms/SLPVectorizer/X86/hsub.ll')
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/hsub.ll40
1 files changed, 10 insertions, 30 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll
index e936dc29d45..d6e44aa1d6a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll
@@ -211,21 +211,11 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
; SLM-NEXT: [[R03:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: ret <4 x double> [[R03]]
;
-; AVX1-LABEL: @test_v4f64(
-; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
-; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
-; AVX1-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
-; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
-; AVX1-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
-; AVX1-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]]
-; AVX1-NEXT: [[R03:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX1-NEXT: ret <4 x double> [[R03]]
-;
-; AVX2-LABEL: @test_v4f64(
-; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-; AVX2-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]]
-; AVX2-NEXT: ret <4 x double> [[TMP3]]
+; AVX-LABEL: @test_v4f64(
+; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+; AVX-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]]
+; AVX-NEXT: ret <4 x double> [[TMP3]]
;
; AVX512-LABEL: @test_v4f64(
; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -341,21 +331,11 @@ define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SLM-NEXT: [[R03:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: ret <4 x i64> [[R03]]
;
-; AVX1-LABEL: @test_v4i64(
-; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 4>
-; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 1, i32 5>
-; AVX1-NEXT: [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]]
-; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 2, i32 6>
-; AVX1-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 3, i32 7>
-; AVX1-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP4]], [[TMP5]]
-; AVX1-NEXT: [[R03:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX1-NEXT: ret <4 x i64> [[R03]]
-;
-; AVX2-LABEL: @test_v4i64(
-; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-; AVX2-NEXT: [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]]
-; AVX2-NEXT: ret <4 x i64> [[TMP3]]
+; AVX-LABEL: @test_v4i64(
+; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+; AVX-NEXT: [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]]
+; AVX-NEXT: ret <4 x i64> [[TMP3]]
;
; AVX512-LABEL: @test_v4i64(
; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
OpenPOWER on IntegriCloud