summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2015-10-01 23:40:12 +0000
committerChandler Carruth <chandlerc@gmail.com>2015-10-01 23:40:12 +0000
commitcbe6411401e21d76d6d80997f62583c98822c518 (patch)
treedd432010b54d47424bba6a45b996c8208a2c2d16 /clang/lib
parent95e82d5b482e71286168eb0cab709d627ea9a06f (diff)
downloadbcm5719-llvm-cbe6411401e21d76d6d80997f62583c98822c518.tar.gz
bcm5719-llvm-cbe6411401e21d76d6d80997f62583c98822c518.zip
Fix the SSE4 byte sign extension in a cleaner way, and more thoroughly
test that our intrinsics behave the same under -fsigned-char and -funsigned-char. This further testing uncovered that AVX-2 has a broken cmpgt for 8-bit elements, and has for a long time. This is fixed in the same way as SSE4 handles the case. The other ISA extensions currently work correctly because they use specific instruction intrinsics. As soon as they are rewritten in terms of generic IR, they will need to add these special casts. I've added the necessary testing to catch this however, so we shouldn't have to chase it down again. I considered changing the core typedef to be signed, but that seems like a bad idea. Notably, it would be an ABI break if anyone is reaching into the innards of the intrinsic headers and passing __v16qi on an API boundary. I can't be completely confident that this wouldn't happen due to a macro expanding in a lambda, etc., so it seems much better to leave it alone. It also matches GCC's behavior exactly. A fun side note is that for both GCC and Clang, -funsigned-char really does change the semantics of __v16qi. To observe this, consider: % cat x.cc #include <smmintrin.h> #include <iostream> int main() { __v16qi a = { 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; __v16qi b = _mm_set1_epi8(-1); std::cout << (int)(a / b)[0] << ", " << (int)(a / b)[1] << '\n'; } % clang++ -o x x.cc && ./x -1, 1 % clang++ -funsigned-char -o x x.cc && ./x 0, 1 However, while this may be surprising, both Clang and GCC agree. Differential Revision: http://reviews.llvm.org/D13324 llvm-svn: 249097
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/Headers/avx2intrin.h4
-rw-r--r--clang/lib/Headers/avxintrin.h4
-rw-r--r--clang/lib/Headers/emmintrin.h7
-rw-r--r--clang/lib/Headers/smmintrin.h28
4 files changed, 22 insertions, 21 deletions
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index a5c376cca62..90b8530126d 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -208,7 +208,9 @@ _mm256_cmpeq_epi64(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_cmpgt_epi8(__m256i __a, __m256i __b)
{
- return (__m256i)((__v32qi)__a > (__v32qi)__b);
+ /* This function always performs a signed comparison, but __v32qi is a char
+ which may be signed or unsigned, so use __v32qs. */
+ return (__m256i)((__v32qs)__a > (__v32qs)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index a8418ad0ed6..7e4de9d443c 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -35,6 +35,10 @@ typedef int __v8si __attribute__ ((__vector_size__ (32)));
typedef short __v16hi __attribute__ ((__vector_size__ (32)));
typedef char __v32qi __attribute__ ((__vector_size__ (32)));
+/* We need an explicitly signed variant for char. Note that this shouldn't
+ * appear in the interface though. */
+typedef signed char __v32qs __attribute__((__vector_size__(32)));
+
typedef float __m256 __attribute__ ((__vector_size__ (32)));
typedef double __m256d __attribute__((__vector_size__(32)));
typedef long long __m256i __attribute__((__vector_size__(32)));
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 761aefafa18..47eaf091e1e 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -35,6 +35,10 @@ typedef long long __v2di __attribute__ ((__vector_size__ (16)));
typedef short __v8hi __attribute__((__vector_size__(16)));
typedef char __v16qi __attribute__((__vector_size__(16)));
+/* We need an explicitly signed variant for char. Note that this shouldn't
+ * appear in the interface though. */
+typedef signed char __v16qs __attribute__((__vector_size__(16)));
+
#include <f16cintrin.h>
/* Define the default attributes for the functions in this file. */
@@ -996,8 +1000,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmpgt_epi8(__m128i __a, __m128i __b)
{
/* This function always performs a signed comparison, but __v16qi is a char
- which may be signed or unsigned. */
- typedef signed char __v16qs __attribute__((__vector_size__(16)));
+ which may be signed or unsigned, so use __v16qs. */
return (__m128i)((__v16qs)__a > (__v16qs)__b);
}
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index 7a18201eced..90ba9970cdb 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -286,34 +286,26 @@ _mm_cmpeq_epi64(__m128i __V1, __m128i __V2)
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi8_epi16(__m128i __V)
{
- /* We need a local definitively signed typedef similar to __v16qi even in the
- * presence of __UNSIGNED_CHAR__.
- * FIXME: __v16qi should almost certainly be definitively signed.
- */
- typedef signed char __signed_v16qi __attribute__((__vector_size__(16)));
- return (__m128i)__builtin_convertvector(__builtin_shufflevector((__signed_v16qi)__V, (__signed_v16qi)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
+ /* This function always performs a signed extension, but __v16qi is a char
+ which may be signed or unsigned, so use __v16qs. */
+ return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi8_epi32(__m128i __V)
{
- /* We need a local definitively signed typedef similar to __v16qi even in the
- * presence of __UNSIGNED_CHAR__.
- * FIXME: __v16qi should almost certainly be definitively signed.
- */
- typedef signed char __signed_v16qi __attribute__((__vector_size__(16)));
- return (__m128i)__builtin_convertvector(__builtin_shufflevector((__signed_v16qi)__V, (__signed_v16qi)__V, 0, 1, 2, 3), __v4si);
+ /* This function always performs a signed extension, but __v16qi is a char
+ which may be signed or unsigned, so use __v16qs. */
+ return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi8_epi64(__m128i __V)
{
- /* We need a local definitively signed typedef similar to __v16qi even in the
- * presence of __UNSIGNED_CHAR__.
- * FIXME: __v16qi should almost certainly be definitively signed.
- */
- typedef signed char __signed_v16qi __attribute__((__vector_size__(16)));
- return (__m128i)__builtin_convertvector(__builtin_shufflevector((__signed_v16qi)__V, (__signed_v16qi)__V, 0, 1), __v2di);
+ /* This function always performs a signed extension, but __v16qi is a char
+ which may be signed or unsigned, so use __v16qs. */
+ typedef signed char __v16qs __attribute__((__vector_size__(16)));
+ return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
OpenPOWER on IntegriCloud