diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2015-03-10 15:19:26 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2015-03-10 15:19:26 +0000 |
| commit | 7f6aa52e93eeff51a4a97921a5e7a2c6d27535ad (patch) | |
| tree | 33d4fe29bb145838dd074af702f8e321ad9fc15a /clang/test/CodeGen | |
| parent | 4683395808cf9f4eca70275dd626334994f35330 (diff) | |
| download | bcm5719-llvm-7f6aa52e93eeff51a4a97921a5e7a2c6d27535ad.tar.gz bcm5719-llvm-7f6aa52e93eeff51a4a97921a5e7a2c6d27535ad.zip | |
[X86, AVX] Replace vinsertf128 intrinsics with generic shuffles.
We want to replace as much custom x86 shuffling via intrinsics
as possible because pushing the code down the generic shuffle
optimization path allows for better codegen and less complexity
in LLVM.
This is the sibling patch for the LLVM half of this change:
http://reviews.llvm.org/D8086
Differential Revision: http://reviews.llvm.org/D8088
llvm-svn: 231792
Diffstat (limited to 'clang/test/CodeGen')
| -rw-r--r-- | clang/test/CodeGen/avx-shuffle-builtins.c | 39 | ||||
| -rw-r--r-- | clang/test/CodeGen/builtins-x86.c | 3 |
2 files changed, 39 insertions, 3 deletions
diff --git a/clang/test/CodeGen/avx-shuffle-builtins.c b/clang/test/CodeGen/avx-shuffle-builtins.c index 76e2395fe8e..3273b1ea2f8 100644 --- a/clang/test/CodeGen/avx-shuffle-builtins.c +++ b/clang/test/CodeGen/avx-shuffle-builtins.c @@ -97,3 +97,42 @@ test_mm256_broadcast_ss(float const *__a) { // CHECK: insertelement <8 x float> {{.*}}, i32 7 return _mm256_broadcast_ss(__a); } + +// Make sure we have the correct mask for each insertf128 case. + +__m256d test_mm256_insertf128_ps_0(__m256 a, __m128 b) { + // CHECK-LABEL: @test_mm256_insertf128_ps_0 + // CHECK: shufflevector{{.*}}<i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7> + return _mm256_insertf128_ps(a, b, 0); +} + +__m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) { + // CHECK-LABEL: @test_mm256_insertf128_pd_0 + // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3> + return _mm256_insertf128_pd(a, b, 0); +} + +__m256d test_mm256_insertf128_si256_0(__m256i a, __m128i b) { + // CHECK-LABEL: @test_mm256_insertf128_si256_0 + // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3> + return _mm256_insertf128_si256(a, b, 0); +} + +__m256d test_mm256_insertf128_ps_1(__m256 a, __m128 b) { + // CHECK-LABEL: @test_mm256_insertf128_ps_1 + // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> + return _mm256_insertf128_ps(a, b, 1); +} + +__m256d test_mm256_insertf128_pd_1(__m256d a, __m128d b) { + // CHECK-LABEL: @test_mm256_insertf128_pd_1 + // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5> + return _mm256_insertf128_pd(a, b, 1); +} + +__m256d test_mm256_insertf128_si256_1(__m256i a, __m128i b) { + // CHECK-LABEL: @test_mm256_insertf128_si256_1 + // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5> + return _mm256_insertf128_si256(a, b, 1); +} + diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index c77be2cd7e5..811bef28816 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -419,9 +419,6 @@ void f0() { tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7); tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7); tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7); - tmp_V4d = __builtin_ia32_vinsertf128_pd256(tmp_V4d, tmp_V2d, 0x1); - tmp_V8f = __builtin_ia32_vinsertf128_ps256(tmp_V8f, tmp_V4f, 0x1); - tmp_V8i = __builtin_ia32_vinsertf128_si256(tmp_V8i, tmp_V4i, 0x1); tmp_V4d = __builtin_ia32_sqrtpd256(tmp_V4d); tmp_V8f = __builtin_ia32_sqrtps256(tmp_V8f); tmp_V8f = __builtin_ia32_rsqrtps256(tmp_V8f); |

