summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-09-29 17:49:42 +0000
committerCraig Topper <craig.topper@intel.com>2018-09-29 17:49:42 +0000
commit716e8e6858222a1783bd62e10e7f26491cd2cd4f (patch)
tree902e406e7bcee679dc124c340185d5fee23988b3
parenta93407fadfe601f28e0a2df5e7946195f4f414c4 (diff)
downloadbcm5719-llvm-716e8e6858222a1783bd62e10e7f26491cd2cd4f.tar.gz
bcm5719-llvm-716e8e6858222a1783bd62e10e7f26491cd2cd4f.zip
[X86] Add more of the icc unaligned load/store to/from 128 bit vector intrinsics
Summary: This patch adds _mm_loadu_si32 _mm_loadu_si16 _mm_storeu_si64 _mm_storeu_si32 _mm_storeu_si16 We already had _mm_load_si64. Reviewers: spatel, RKSimon Reviewed By: RKSimon Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D52665 llvm-svn: 343388
-rw-r--r--clang/lib/Headers/emmintrin.h107
-rw-r--r--clang/test/CodeGen/sse2-builtins.c48
2 files changed, 154 insertions, 1 deletions
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index f0ea7cd05c6..6d61f971994 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -1675,7 +1675,49 @@ _mm_loadu_si64(void const *__a)
long long __v;
} __attribute__((__packed__, __may_alias__));
long long __u = ((struct __loadu_si64*)__a)->__v;
- return __extension__ (__m128i)(__v2di){__u, 0L};
+ return __extension__ (__m128i)(__v2di){__u, 0LL};
+}
+
+/// Loads a 32-bit integer value to the low element of a 128-bit integer
+/// vector and clears the upper element.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
+///
+/// \param __a
+/// A pointer to a 32-bit memory location. The address of the memory
+/// location does not have to be aligned.
+/// \returns A 128-bit vector of [4 x i32] containing the loaded value.
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_loadu_si32(void const *__a)
+{
+ struct __loadu_si32 {
+ int __v;
+ } __attribute__((__packed__, __may_alias__));
+ int __u = ((struct __loadu_si32*)__a)->__v;
+ return __extension__ (__m128i)(__v4si){__u, 0, 0, 0};
+}
+
+/// Loads a 16-bit integer value to the low element of a 128-bit integer
+/// vector and clears the upper element.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic does not correspond to a specific instruction.
+///
+/// \param __a
+/// A pointer to a 16-bit memory location. The address of the memory
+/// location does not have to be aligned.
+/// \returns A 128-bit vector of [8 x i16] containing the loaded value.
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_loadu_si16(void const *__a)
+{
+ struct __loadu_si16 {
+ short __v;
+ } __attribute__((__packed__, __may_alias__));
+ short __u = ((struct __loadu_si16*)__a)->__v;
+ return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
}
/// Loads a 64-bit double-precision value to the low element of a
@@ -3993,6 +4035,69 @@ _mm_storeu_si128(__m128i *__p, __m128i __b)
((struct __storeu_si128*)__p)->__v = __b;
}
+/// Stores a 64-bit integer value from the low element of a 128-bit integer
+/// vector.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
+///
+/// \param __p
+/// A pointer to a 64-bit memory location. The address of the memory
+/// location does not have to be algned.
+/// \param __b
+/// A 128-bit integer vector containing the value to be stored.
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_storeu_si64(void const *__p, __m128i __b)
+{
+ struct __storeu_si64 {
+ long long __v;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __storeu_si64*)__p)->__v = ((__v2di)__b)[0];
+}
+
+/// Stores a 32-bit integer value from the low element of a 128-bit integer
+/// vector.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
+///
+/// \param __p
+/// A pointer to a 32-bit memory location. The address of the memory
+/// location does not have to be aligned.
+/// \param __b
+/// A 128-bit integer vector containing the value to be stored.
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_storeu_si32(void const *__p, __m128i __b)
+{
+ struct __storeu_si32 {
+ int __v;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __storeu_si32*)__p)->__v = ((__v4si)__b)[0];
+}
+
+/// Stores a 16-bit integer value from the low element of a 128-bit integer
+/// vector.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic does not correspond to a specific instruction.
+///
+/// \param __p
+/// A pointer to a 16-bit memory location. The address of the memory
+/// location does not have to be aligned.
+/// \param __b
+/// A 128-bit integer vector containing the value to be stored.
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_storeu_si16(void const *__p, __m128i __b)
+{
+ struct __storeu_si16 {
+ short __v;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __storeu_si16*)__p)->__v = ((__v8hi)__b)[0];
+}
+
/// Moves bytes selected by the mask from the first operand to the
/// specified unaligned memory location. When a mask bit is 1, the
/// corresponding byte is written, otherwise it is not written.
diff --git a/clang/test/CodeGen/sse2-builtins.c b/clang/test/CodeGen/sse2-builtins.c
index ac22f5b1c85..005bdfd9174 100644
--- a/clang/test/CodeGen/sse2-builtins.c
+++ b/clang/test/CodeGen/sse2-builtins.c
@@ -721,6 +721,30 @@ __m128i test_mm_loadu_si64(void const* A) {
return _mm_loadu_si64(A);
}
+__m128i test_mm_loadu_si32(void const* A) {
+ // CHECK-LABEL: test_mm_loadu_si32
+ // CHECK: load i32, i32* %{{.*}}, align 1{{$}}
+ // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
+ // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1
+ // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2
+ // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3
+ return _mm_loadu_si32(A);
+}
+
+__m128i test_mm_loadu_si16(void const* A) {
+ // CHECK-LABEL: test_mm_loadu_si16
+ // CHECK: load i16, i16* %{{.*}}, align 1{{$}}
+ // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
+ // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 1
+ // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 2
+ // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 3
+ // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 4
+ // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
+ // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
+ // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
+ return _mm_loadu_si16(A);
+}
+
__m128i test_mm_madd_epi16(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_madd_epi16
// CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
@@ -1351,6 +1375,30 @@ void test_mm_storeu_si128(__m128i* A, __m128i B) {
_mm_storeu_si128(A, B);
}
+void test_mm_storeu_si64(void* A, __m128i B) {
+ // CHECK-LABEL: test_mm_storeu_si64
+ // CHECK: [[EXT:%.*]] = extractelement <2 x i64> %{{.*}}, i32 0
+ // CHECK: store i64 [[EXT]], i64* %{{.*}}, align 1{{$}}
+ // CHECK-NEXT: ret void
+ _mm_storeu_si64(A, B);
+}
+
+void test_mm_storeu_si32(void* A, __m128i B) {
+ // CHECK-LABEL: test_mm_storeu_si32
+ // CHECK: [[EXT:%.*]] = extractelement <4 x i32> %{{.*}}, i32 0
+ // CHECK: store i32 [[EXT]], i32* %{{.*}}, align 1{{$}}
+ // CHECK-NEXT: ret void
+ _mm_storeu_si32(A, B);
+}
+
+void test_mm_storeu_si16(void* A, __m128i B) {
+ // CHECK-LABEL: test_mm_storeu_si16
+ // CHECK: [[EXT:%.*]] = extractelement <8 x i16> %{{.*}}, i32 0
+ // CHECK: store i16 [[EXT]], i16* %{{.*}}, align 1{{$}}
+ // CHECK-NEXT: ret void
+ _mm_storeu_si16(A, B);
+}
+
void test_mm_stream_pd(double *A, __m128d B) {
// CHECK-LABEL: test_mm_stream_pd
// CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal
OpenPOWER on IntegriCloud