summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-07-09 05:30:41 +0000
committerCraig Topper <craig.topper@gmail.com>2016-07-09 05:30:41 +0000
commit95b61b0544a024661c86215506ce035b1d966ebc (patch)
treee6386c240777d54eb6e280d12c5e6002159867c0
parent45a59a08bc712c63505f688da7f015145cc3edbd (diff)
downloadbcm5719-llvm-95b61b0544a024661c86215506ce035b1d966ebc.tar.gz
bcm5719-llvm-95b61b0544a024661c86215506ce035b1d966ebc.zip
[X86] Use __builtin_ia32_vec_ext_v4hi and __builtin_ia32_vec_set_v4hi to implement pextrw/pinsertw MMX intrinsics instead of trying to use native IR.
Without this we end up generating code that doesn't use mmx registers and probably doesn't work well with other mmx intrinsics. llvm-svn: 274968
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.def2
-rw-r--r--clang/lib/Headers/xmmintrin.h17
-rw-r--r--clang/test/CodeGen/mmx-builtins.c12
3 files changed, 18 insertions, 13 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index fc71f90e074..59dee2f0466 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -161,6 +161,8 @@ TARGET_BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "", "sse")
TARGET_BUILTIN(__builtin_ia32_pmulhuw, "V4sV4sV4s", "", "sse")
TARGET_BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "", "sse")
TARGET_BUILTIN(__builtin_ia32_pshufw, "V4sV4sIc", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v4hi, "iV4sIi", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v4hi, "V4sV4siIi", "", "sse")
// MMX+SSE2
TARGET_BUILTIN(__builtin_ia32_cvtpd2pi, "V2iV2d", "", "sse2")
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 27967e0d856..3110e8babf9 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -2114,12 +2114,8 @@ _mm_sfence(void)
/// 2: Bits [47:32] are copied to the destination.
/// 3: Bits [63:48] are copied to the destination.
/// \returns A 16-bit integer containing the extracted 16 bits of packed data.
-static __inline__ int __DEFAULT_FN_ATTRS
-_mm_extract_pi16(__m64 __a, int __n)
-{
- __v4hi __b = (__v4hi)__a;
- return (unsigned short)__b[__n & 3];
-}
+#define _mm_extract_pi16(a, n) __extension__ ({ \
+ (int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n); })
/// \brief Copies data from the 64-bit vector of [4 x i16] to the destination,
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
@@ -2145,13 +2141,8 @@ _mm_extract_pi16(__m64 __a, int __n)
/// bits in operand __a.
/// \returns A 64-bit integer vector containing the copied packed data from the
/// operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_insert_pi16(__m64 __a, int __d, int __n)
-{
- __v4hi __b = (__v4hi)__a;
- __b[__n & 3] = __d;
- return (__m64)__b;
-}
+#define _mm_insert_pi16(a, d, n) __extension__ ({ \
+ (__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n); })
/// \brief Compares each of the corresponding packed 16-bit integer values of
/// the 64-bit integer vectors, and writes the greater value to the
diff --git a/clang/test/CodeGen/mmx-builtins.c b/clang/test/CodeGen/mmx-builtins.c
index e2761f4e36c..2bf497d58aa 100644
--- a/clang/test/CodeGen/mmx-builtins.c
+++ b/clang/test/CodeGen/mmx-builtins.c
@@ -217,6 +217,12 @@ __m64 test_mm_cvttps_pi32(__m128 a) {
return _mm_cvttps_pi32(a);
}
+int test_mm_extract_pi16(__m64 a) {
+ // CHECK-LABEL: test_mm_extract_pi16
+ // CHECK: call i32 @llvm.x86.mmx.pextr.w
+ return _mm_extract_pi16(a, 2);
+}
+
__m64 test_m_from_int(int a) {
// CHECK-LABEL: test_m_from_int
// CHECK: insertelement <2 x i32>
@@ -265,6 +271,12 @@ __m64 test_mm_hsubs_pi16(__m64 a, __m64 b) {
return _mm_hsubs_pi16(a, b);
}
+__m64 test_mm_insert_pi16(__m64 a, int d) {
+ // CHECK-LABEL: test_mm_insert_pi16
+ // CHECK: call x86_mmx @llvm.x86.mmx.pinsr.w
+ return _mm_insert_pi16(a, d, 2);
+}
+
__m64 test_mm_madd_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_madd_pi16
// CHECK: call x86_mmx @llvm.x86.mmx.pmadd.wd
OpenPOWER on IntegriCloud