summaryrefslogtreecommitdiffstats
path: root/clang/test/CodeGen
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2015-03-10 15:19:26 +0000
committerSanjay Patel <spatel@rotateright.com>2015-03-10 15:19:26 +0000
commit7f6aa52e93eeff51a4a97921a5e7a2c6d27535ad (patch)
tree33d4fe29bb145838dd074af702f8e321ad9fc15a /clang/test/CodeGen
parent4683395808cf9f4eca70275dd626334994f35330 (diff)
downloadbcm5719-llvm-7f6aa52e93eeff51a4a97921a5e7a2c6d27535ad.tar.gz
bcm5719-llvm-7f6aa52e93eeff51a4a97921a5e7a2c6d27535ad.zip
[X86, AVX] Replace vinsertf128 intrinsics with generic shuffles.
We want to replace as much custom x86 shuffling via intrinsics as possible because pushing the code down the generic shuffle optimization path allows for better codegen and less complexity in LLVM. This is the sibling patch for the LLVM half of this change: http://reviews.llvm.org/D8086 Differential Revision: http://reviews.llvm.org/D8088 llvm-svn: 231792
Diffstat (limited to 'clang/test/CodeGen')
-rw-r--r--clang/test/CodeGen/avx-shuffle-builtins.c39
-rw-r--r--clang/test/CodeGen/builtins-x86.c3
2 files changed, 39 insertions, 3 deletions
diff --git a/clang/test/CodeGen/avx-shuffle-builtins.c b/clang/test/CodeGen/avx-shuffle-builtins.c
index 76e2395fe8e..3273b1ea2f8 100644
--- a/clang/test/CodeGen/avx-shuffle-builtins.c
+++ b/clang/test/CodeGen/avx-shuffle-builtins.c
@@ -97,3 +97,42 @@ test_mm256_broadcast_ss(float const *__a) {
// CHECK: insertelement <8 x float> {{.*}}, i32 7
return _mm256_broadcast_ss(__a);
}
+
+// Make sure we have the correct mask for each insertf128 case.
+
+__m256d test_mm256_insertf128_ps_0(__m256 a, __m128 b) {
+ // CHECK-LABEL: @test_mm256_insertf128_ps_0
+ // CHECK: shufflevector{{.*}}<i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+ return _mm256_insertf128_ps(a, b, 0);
+}
+
+__m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) {
+ // CHECK-LABEL: @test_mm256_insertf128_pd_0
+ // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3>
+ return _mm256_insertf128_pd(a, b, 0);
+}
+
+__m256d test_mm256_insertf128_si256_0(__m256i a, __m128i b) {
+ // CHECK-LABEL: @test_mm256_insertf128_si256_0
+ // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3>
+ return _mm256_insertf128_si256(a, b, 0);
+}
+
+__m256d test_mm256_insertf128_ps_1(__m256 a, __m128 b) {
+ // CHECK-LABEL: @test_mm256_insertf128_ps_1
+ // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+ return _mm256_insertf128_ps(a, b, 1);
+}
+
+__m256d test_mm256_insertf128_pd_1(__m256d a, __m128d b) {
+ // CHECK-LABEL: @test_mm256_insertf128_pd_1
+ // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5>
+ return _mm256_insertf128_pd(a, b, 1);
+}
+
+__m256d test_mm256_insertf128_si256_1(__m256i a, __m128i b) {
+ // CHECK-LABEL: @test_mm256_insertf128_si256_1
+ // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5>
+ return _mm256_insertf128_si256(a, b, 1);
+}
+
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index c77be2cd7e5..811bef28816 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -419,9 +419,6 @@ void f0() {
tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7);
tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7);
tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7);
- tmp_V4d = __builtin_ia32_vinsertf128_pd256(tmp_V4d, tmp_V2d, 0x1);
- tmp_V8f = __builtin_ia32_vinsertf128_ps256(tmp_V8f, tmp_V4f, 0x1);
- tmp_V8i = __builtin_ia32_vinsertf128_si256(tmp_V8i, tmp_V4i, 0x1);
tmp_V4d = __builtin_ia32_sqrtpd256(tmp_V4d);
tmp_V8f = __builtin_ia32_sqrtps256(tmp_V8f);
tmp_V8f = __builtin_ia32_rsqrtps256(tmp_V8f);
OpenPOWER on IntegriCloud