summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.def3
-rw-r--r--clang/lib/Headers/avxintrin.h41
-rw-r--r--clang/lib/Sema/SemaChecking.cpp3
-rw-r--r--clang/test/CodeGen/avx-shuffle-builtins.c46
-rw-r--r--clang/test/CodeGen/builtins-x86.c3
5 files changed, 70 insertions, 26 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 7cb50b9c035..5eb0b84f342 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -436,9 +436,6 @@ BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "")
BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "")
BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "")
BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "")
-BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIc", "")
-BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIc", "")
-BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIc", "")
BUILTIN(__builtin_ia32_cvtdq2pd256, "V4dV4i", "")
BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "")
BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "")
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 2d1735ec19f..f7bda6b360e 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -429,19 +429,6 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
__m128 __b = (b); \
(__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); })
-/* Vector extract */
-#define _mm256_extractf128_pd(A, O) __extension__ ({ \
- __m256d __A = (A); \
- (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)__A, (O)); })
-
-#define _mm256_extractf128_ps(A, O) __extension__ ({ \
- __m256 __A = (A); \
- (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)__A, (O)); })
-
-#define _mm256_extractf128_si256(A, O) __extension__ ({ \
- __m256i __A = (A); \
- (__m128i)__builtin_ia32_vextractf128_si256((__v8si)__A, (O)); })
-
static __inline int __attribute__((__always_inline__, __nodebug__))
_mm256_extract_epi32(__m256i __a, const int __imm)
{
@@ -1186,6 +1173,34 @@ _mm256_castsi128_si256(__m128i __a)
(((M) & 1) ? 4 : 2), \
(((M) & 1) ? 5 : 3) );})
+/*
+ Vector extract.
+ We use macros rather than inlines because we only want to accept
+ invocations where the immediate M is a constant expression.
+*/
+#define _mm256_extractf128_ps(V, M) __extension__ ({ \
+ (__m128)__builtin_shufflevector( \
+ (__v8sf)(V), \
+ (__v8sf)(V), \
+ (((M) & 1) ? 4 : 0), \
+ (((M) & 1) ? 5 : 1), \
+ (((M) & 1) ? 6 : 2), \
+ (((M) & 1) ? 7 : 3) );})
+
+#define _mm256_extractf128_pd(V, M) __extension__ ({ \
+ (__m128d)__builtin_shufflevector( \
+ (__v4df)(V), \
+ (__v4df)(V), \
+ (((M) & 1) ? 2 : 0), \
+ (((M) & 1) ? 3 : 1) );})
+
+#define _mm256_extractf128_si256(V, M) __extension__ ({ \
+ (__m128i)__builtin_shufflevector( \
+ (__v4di)(V), \
+ (__v4di)(V), \
+ (((M) & 1) ? 2 : 0), \
+ (((M) & 1) ? 3 : 1) );})
+
/* SIMD load ops (unaligned) */
static __inline __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index d66730373b4..7a4a370adc2 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -882,9 +882,6 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
switch (BuiltinID) {
default: return false;
case X86::BI_mm_prefetch: i = 1; l = 0; u = 3; break;
- case X86::BI__builtin_ia32_vextractf128_pd256:
- case X86::BI__builtin_ia32_vextractf128_ps256:
- case X86::BI__builtin_ia32_vextractf128_si256:
case X86::BI__builtin_ia32_extract128i256: i = 1, l = 0, u = 1; break;
case X86::BI__builtin_ia32_insert128i256: i = 2, l = 0; u = 1; break;
case X86::BI__builtin_ia32_sha1rnds4: i = 2, l = 0; u = 3; break;
diff --git a/clang/test/CodeGen/avx-shuffle-builtins.c b/clang/test/CodeGen/avx-shuffle-builtins.c
index 3273b1ea2f8..2800ff89bda 100644
--- a/clang/test/CodeGen/avx-shuffle-builtins.c
+++ b/clang/test/CodeGen/avx-shuffle-builtins.c
@@ -100,7 +100,7 @@ test_mm256_broadcast_ss(float const *__a) {
// Make sure we have the correct mask for each insertf128 case.
-__m256d test_mm256_insertf128_ps_0(__m256 a, __m128 b) {
+__m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) {
// CHECK-LABEL: @test_mm256_insertf128_ps_0
// CHECK: shufflevector{{.*}}<i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
return _mm256_insertf128_ps(a, b, 0);
@@ -112,13 +112,13 @@ __m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) {
return _mm256_insertf128_pd(a, b, 0);
}
-__m256d test_mm256_insertf128_si256_0(__m256i a, __m128i b) {
+__m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) {
// CHECK-LABEL: @test_mm256_insertf128_si256_0
// CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3>
return _mm256_insertf128_si256(a, b, 0);
}
-__m256d test_mm256_insertf128_ps_1(__m256 a, __m128 b) {
+__m256 test_mm256_insertf128_ps_1(__m256 a, __m128 b) {
// CHECK-LABEL: @test_mm256_insertf128_ps_1
// CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
return _mm256_insertf128_ps(a, b, 1);
@@ -130,9 +130,47 @@ __m256d test_mm256_insertf128_pd_1(__m256d a, __m128d b) {
return _mm256_insertf128_pd(a, b, 1);
}
-__m256d test_mm256_insertf128_si256_1(__m256i a, __m128i b) {
+__m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) {
// CHECK-LABEL: @test_mm256_insertf128_si256_1
// CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5>
return _mm256_insertf128_si256(a, b, 1);
}
+// Make sure we have the correct mask for each extractf128 case.
+
+__m128 test_mm256_extractf128_ps_0(__m256 a) {
+ // CHECK-LABEL: @test_mm256_extractf128_ps_0
+ // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3>
+ return _mm256_extractf128_ps(a, 0);
+}
+
+__m128d test_mm256_extractf128_pd_0(__m256d a) {
+ // CHECK-LABEL: @test_mm256_extractf128_pd_0
+ // CHECK: shufflevector{{.*}}<i32 0, i32 1>
+ return _mm256_extractf128_pd(a, 0);
+}
+
+__m128i test_mm256_extractf128_si256_0(__m256i a) {
+ // CHECK-LABEL: @test_mm256_extractf128_si256_0
+ // CHECK: shufflevector{{.*}}<i32 0, i32 1>
+ return _mm256_extractf128_si256(a, 0);
+}
+
+__m128 test_mm256_extractf128_ps_1(__m256 a) {
+ // CHECK-LABEL: @test_mm256_extractf128_ps_1
+ // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 6, i32 7>
+ return _mm256_extractf128_ps(a, 1);
+}
+
+__m128d test_mm256_extractf128_pd_1(__m256d a) {
+ // CHECK-LABEL: @test_mm256_extractf128_pd_1
+ // CHECK: shufflevector{{.*}}<i32 2, i32 3>
+ return _mm256_extractf128_pd(a, 1);
+}
+
+__m128i test_mm256_extractf128_si256_1(__m256i a) {
+ // CHECK-LABEL: @test_mm256_extractf128_si256_1
+ // CHECK: shufflevector{{.*}}<i32 2, i32 3>
+ return _mm256_extractf128_si256(a, 1);
+}
+
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index 811bef28816..8a5b5a272d4 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -405,9 +405,6 @@ void f0() {
tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7);
tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0);
tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0);
- tmp_V2d = __builtin_ia32_vextractf128_pd256(tmp_V4d, 0x1);
- tmp_V4f = __builtin_ia32_vextractf128_ps256(tmp_V8f, 0x1);
- tmp_V4i = __builtin_ia32_vextractf128_si256(tmp_V8i, 0x1);
tmp_V4d = __builtin_ia32_cvtdq2pd256(tmp_V4i);
tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);
tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);
OpenPOWER on IntegriCloud