[X86, AVX] Replace vinsertf128 intrinsics with generic shuffles.

We want to replace as much custom x86 shuffling via intrinsics as possible because pushing the code down the generic shuffle optimization path allows for better codegen and less complexity in LLVM. This is the sibling patch for the LLVM half of this change: http://reviews.llvm.org/D8086 Differential Revision: http://reviews.llvm.org/D8088 llvm-svn: 231792
author: Sanjay Patel <spatel@rotateright.com> 2015-03-10 15:19:26 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2015-03-10 15:19:26 +0000
commit: 7f6aa52e93eeff51a4a97921a5e7a2c6d27535ad (patch)
tree: 33d4fe29bb145838dd074af702f8e321ad9fc15a /clang/test/CodeGen
parent: 4683395808cf9f4eca70275dd626334994f35330 (diff)
download: bcm5719-llvm-7f6aa52e93eeff51a4a97921a5e7a2c6d27535ad.tar.gz
bcm5719-llvm-7f6aa52e93eeff51a4a97921a5e7a2c6d27535ad.zip
2 files changed, 39 insertions, 3 deletions
diff --git a/clang/test/CodeGen/avx-shuffle-builtins.c b/clang/test/CodeGen/avx-shuffle-builtins.c
index 76e2395fe8e..3273b1ea2f8 100644
--- a/clang/test/CodeGen/avx-shuffle-builtins.c
+++ b/clang/test/CodeGen/avx-shuffle-builtins.c
@@ -97,3 +97,42 @@ test_mm256_broadcast_ss(float const *__a) {
   // CHECK: insertelement <8 x float> {{.*}}, i32 7
   return _mm256_broadcast_ss(__a);
 }
+
+// Make sure we have the correct mask for each insertf128 case.
+
+__m256d test_mm256_insertf128_ps_0(__m256 a, __m128 b) {
+  // CHECK-LABEL: @test_mm256_insertf128_ps_0
+  // CHECK: shufflevector{{.*}}<i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  return _mm256_insertf128_ps(a, b, 0);
+}
+
+__m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) {
+  // CHECK-LABEL: @test_mm256_insertf128_pd_0
+  // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3>
+  return _mm256_insertf128_pd(a, b, 0);
+}
+
+__m256d test_mm256_insertf128_si256_0(__m256i a, __m128i b) {
+  // CHECK-LABEL: @test_mm256_insertf128_si256_0
+  // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3>
+  return _mm256_insertf128_si256(a, b, 0);
+}
+
+__m256d test_mm256_insertf128_ps_1(__m256 a, __m128 b) {
+  // CHECK-LABEL: @test_mm256_insertf128_ps_1
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  return _mm256_insertf128_ps(a, b, 1);
+}
+
+__m256d test_mm256_insertf128_pd_1(__m256d a, __m128d b) {
+  // CHECK-LABEL: @test_mm256_insertf128_pd_1
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5>
+  return _mm256_insertf128_pd(a, b, 1);
+}
+
+__m256d test_mm256_insertf128_si256_1(__m256i a, __m128i b) {
+  // CHECK-LABEL: @test_mm256_insertf128_si256_1
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5>
+  return _mm256_insertf128_si256(a, b, 1);
+}
+
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index c77be2cd7e5..811bef28816 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -419,9 +419,6 @@ void f0() {
   tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7);
   tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7);
   tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7);
-  tmp_V4d = __builtin_ia32_vinsertf128_pd256(tmp_V4d, tmp_V2d, 0x1);
-  tmp_V8f = __builtin_ia32_vinsertf128_ps256(tmp_V8f, tmp_V4f, 0x1);
-  tmp_V8i = __builtin_ia32_vinsertf128_si256(tmp_V8i, tmp_V4i, 0x1);
   tmp_V4d = __builtin_ia32_sqrtpd256(tmp_V4d);
   tmp_V8f = __builtin_ia32_sqrtps256(tmp_V8f);
   tmp_V8f = __builtin_ia32_rsqrtps256(tmp_V8f);
author	Sanjay Patel <spatel@rotateright.com>	2015-03-10 15:19:26 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2015-03-10 15:19:26 +0000
commit	7f6aa52e93eeff51a4a97921a5e7a2c6d27535ad (patch)
tree	33d4fe29bb145838dd074af702f8e321ad9fc15a /clang/test/CodeGen
parent	4683395808cf9f4eca70275dd626334994f35330 (diff)
download	bcm5719-llvm-7f6aa52e93eeff51a4a97921a5e7a2c6d27535ad.tar.gz bcm5719-llvm-7f6aa52e93eeff51a4a97921a5e7a2c6d27535ad.zip