diff options
-rw-r--r-- | clang/include/clang/Basic/BuiltinsX86.def | 3 | ||||
-rw-r--r-- | clang/lib/Headers/avxintrin.h | 9 | ||||
-rw-r--r-- | clang/test/CodeGen/avx-shuffle-builtins.c | 34 | ||||
-rw-r--r-- | clang/test/CodeGen/builtins-x86.c | 3 |
4 files changed, 40 insertions, 9 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 8faaea381ae..6cd7a79d545 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -453,9 +453,6 @@ BUILTIN(__builtin_ia32_movmskpd256, "iV4d", "") BUILTIN(__builtin_ia32_movmskps256, "iV8f", "") BUILTIN(__builtin_ia32_vzeroall, "v", "") BUILTIN(__builtin_ia32_vzeroupper, "v", "") -BUILTIN(__builtin_ia32_vbroadcastss, "V4ffC*", "") -BUILTIN(__builtin_ia32_vbroadcastsd256, "V4ddC*", "") -BUILTIN(__builtin_ia32_vbroadcastss256, "V8ffC*", "") BUILTIN(__builtin_ia32_vbroadcastf128_pd256, "V4dV2dC*", "") BUILTIN(__builtin_ia32_vbroadcastf128_ps256, "V8fV4fC*", "") BUILTIN(__builtin_ia32_storeupd256, "vd*V4d", "") diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 3d50439d36a..4e1044af561 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -737,19 +737,22 @@ _mm256_zeroupper(void) static __inline __m128 __attribute__((__always_inline__, __nodebug__)) _mm_broadcast_ss(float const *__a) { - return (__m128)__builtin_ia32_vbroadcastss(__a); + float __f = *__a; + return (__m128)(__v4sf){ __f, __f, __f, __f }; } static __inline __m256d __attribute__((__always_inline__, __nodebug__)) _mm256_broadcast_sd(double const *__a) { - return (__m256d)__builtin_ia32_vbroadcastsd256(__a); + double __d = *__a; + return (__m256d)(__v4df){ __d, __d, __d, __d }; } static __inline __m256 __attribute__((__always_inline__, __nodebug__)) _mm256_broadcast_ss(float const *__a) { - return (__m256)__builtin_ia32_vbroadcastss256(__a); + float __f = *__a; + return (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f }; } static __inline __m256d __attribute__((__always_inline__, __nodebug__)) diff --git a/clang/test/CodeGen/avx-shuffle-builtins.c b/clang/test/CodeGen/avx-shuffle-builtins.c index d071f825aa9..76e2395fe8e 100644 --- a/clang/test/CodeGen/avx-shuffle-builtins.c +++ b/clang/test/CodeGen/avx-shuffle-builtins.c @@ -63,3 +63,37 @@ __m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) { // CHECK: @llvm.x86.avx.vperm2f128.si.256 return _mm256_permute2f128_si256(a, b, 0x20); } + +__m128 +test_mm_broadcast_ss(float const *__a) { + // CHECK-LABEL: @test_mm_broadcast_ss + // CHECK: insertelement <4 x float> {{.*}}, i32 0 + // CHECK: insertelement <4 x float> {{.*}}, i32 1 + // CHECK: insertelement <4 x float> {{.*}}, i32 2 + // CHECK: insertelement <4 x float> {{.*}}, i32 3 + return _mm_broadcast_ss(__a); +} + +__m256d +test_mm256_broadcast_sd(double const *__a) { + // CHECK-LABEL: @test_mm256_broadcast_sd + // CHECK: insertelement <4 x double> {{.*}}, i32 0 + // CHECK: insertelement <4 x double> {{.*}}, i32 1 + // CHECK: insertelement <4 x double> {{.*}}, i32 2 + // CHECK: insertelement <4 x double> {{.*}}, i32 3 + return _mm256_broadcast_sd(__a); +} + +__m256 +test_mm256_broadcast_ss(float const *__a) { + // CHECK-LABEL: @test_mm256_broadcast_ss + // CHECK: insertelement <8 x float> {{.*}}, i32 0 + // CHECK: insertelement <8 x float> {{.*}}, i32 1 + // CHECK: insertelement <8 x float> {{.*}}, i32 2 + // CHECK: insertelement <8 x float> {{.*}}, i32 3 + // CHECK: insertelement <8 x float> {{.*}}, i32 4 + // CHECK: insertelement <8 x float> {{.*}}, i32 5 + // CHECK: insertelement <8 x float> {{.*}}, i32 6 + // CHECK: insertelement <8 x float> {{.*}}, i32 7 + return _mm256_broadcast_ss(__a); +} diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index 6df005d8604..8443574c528 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -451,9 +451,6 @@ void f0() { tmp_i = __builtin_ia32_movmskps256(tmp_V8f); __builtin_ia32_vzeroall(); __builtin_ia32_vzeroupper(); - tmp_V4f = __builtin_ia32_vbroadcastss(tmp_fCp); - tmp_V4d = __builtin_ia32_vbroadcastsd256(tmp_dCp); - tmp_V8f = __builtin_ia32_vbroadcastss256(tmp_fCp); tmp_V4d = __builtin_ia32_vbroadcastf128_pd256(tmp_V2dCp); tmp_V8f = __builtin_ia32_vbroadcastf128_ps256(tmp_V4fCp); __builtin_ia32_storeupd256(tmp_dp, tmp_V4d); |