summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.def3
-rw-r--r--clang/lib/Headers/avxintrin.h9
-rw-r--r--clang/test/CodeGen/avx-shuffle-builtins.c34
-rw-r--r--clang/test/CodeGen/builtins-x86.c3
4 files changed, 40 insertions, 9 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 8faaea381ae..6cd7a79d545 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -453,9 +453,6 @@ BUILTIN(__builtin_ia32_movmskpd256, "iV4d", "")
BUILTIN(__builtin_ia32_movmskps256, "iV8f", "")
BUILTIN(__builtin_ia32_vzeroall, "v", "")
BUILTIN(__builtin_ia32_vzeroupper, "v", "")
-BUILTIN(__builtin_ia32_vbroadcastss, "V4ffC*", "")
-BUILTIN(__builtin_ia32_vbroadcastsd256, "V4ddC*", "")
-BUILTIN(__builtin_ia32_vbroadcastss256, "V8ffC*", "")
BUILTIN(__builtin_ia32_vbroadcastf128_pd256, "V4dV2dC*", "")
BUILTIN(__builtin_ia32_vbroadcastf128_ps256, "V8fV4fC*", "")
BUILTIN(__builtin_ia32_storeupd256, "vd*V4d", "")
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 3d50439d36a..4e1044af561 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -737,19 +737,22 @@ _mm256_zeroupper(void)
static __inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_broadcast_ss(float const *__a)
{
- return (__m128)__builtin_ia32_vbroadcastss(__a);
+ float __f = *__a;
+ return (__m128)(__v4sf){ __f, __f, __f, __f };
}
static __inline __m256d __attribute__((__always_inline__, __nodebug__))
_mm256_broadcast_sd(double const *__a)
{
- return (__m256d)__builtin_ia32_vbroadcastsd256(__a);
+ double __d = *__a;
+ return (__m256d)(__v4df){ __d, __d, __d, __d };
}
static __inline __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_broadcast_ss(float const *__a)
{
- return (__m256)__builtin_ia32_vbroadcastss256(__a);
+ float __f = *__a;
+ return (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f };
}
static __inline __m256d __attribute__((__always_inline__, __nodebug__))
diff --git a/clang/test/CodeGen/avx-shuffle-builtins.c b/clang/test/CodeGen/avx-shuffle-builtins.c
index d071f825aa9..76e2395fe8e 100644
--- a/clang/test/CodeGen/avx-shuffle-builtins.c
+++ b/clang/test/CodeGen/avx-shuffle-builtins.c
@@ -63,3 +63,37 @@ __m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) {
// CHECK: @llvm.x86.avx.vperm2f128.si.256
return _mm256_permute2f128_si256(a, b, 0x20);
}
+
+__m128
+test_mm_broadcast_ss(float const *__a) {
+ // CHECK-LABEL: @test_mm_broadcast_ss
+ // CHECK: insertelement <4 x float> {{.*}}, i32 0
+ // CHECK: insertelement <4 x float> {{.*}}, i32 1
+ // CHECK: insertelement <4 x float> {{.*}}, i32 2
+ // CHECK: insertelement <4 x float> {{.*}}, i32 3
+ return _mm_broadcast_ss(__a);
+}
+
+__m256d
+test_mm256_broadcast_sd(double const *__a) {
+ // CHECK-LABEL: @test_mm256_broadcast_sd
+ // CHECK: insertelement <4 x double> {{.*}}, i32 0
+ // CHECK: insertelement <4 x double> {{.*}}, i32 1
+ // CHECK: insertelement <4 x double> {{.*}}, i32 2
+ // CHECK: insertelement <4 x double> {{.*}}, i32 3
+ return _mm256_broadcast_sd(__a);
+}
+
+__m256
+test_mm256_broadcast_ss(float const *__a) {
+ // CHECK-LABEL: @test_mm256_broadcast_ss
+ // CHECK: insertelement <8 x float> {{.*}}, i32 0
+ // CHECK: insertelement <8 x float> {{.*}}, i32 1
+ // CHECK: insertelement <8 x float> {{.*}}, i32 2
+ // CHECK: insertelement <8 x float> {{.*}}, i32 3
+ // CHECK: insertelement <8 x float> {{.*}}, i32 4
+ // CHECK: insertelement <8 x float> {{.*}}, i32 5
+ // CHECK: insertelement <8 x float> {{.*}}, i32 6
+ // CHECK: insertelement <8 x float> {{.*}}, i32 7
+ return _mm256_broadcast_ss(__a);
+}
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index 6df005d8604..8443574c528 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -451,9 +451,6 @@ void f0() {
tmp_i = __builtin_ia32_movmskps256(tmp_V8f);
__builtin_ia32_vzeroall();
__builtin_ia32_vzeroupper();
- tmp_V4f = __builtin_ia32_vbroadcastss(tmp_fCp);
- tmp_V4d = __builtin_ia32_vbroadcastsd256(tmp_dCp);
- tmp_V8f = __builtin_ia32_vbroadcastss256(tmp_fCp);
tmp_V4d = __builtin_ia32_vbroadcastf128_pd256(tmp_V2dCp);
tmp_V8f = __builtin_ia32_vbroadcastf128_ps256(tmp_V4fCp);
__builtin_ia32_storeupd256(tmp_dp, tmp_V4d);
OpenPOWER on IntegriCloud