summaryrefslogtreecommitdiffstats
path: root/clang/test/CodeGen/sse2-builtins.c
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-05-30 17:55:25 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-05-30 17:55:25 +0000
commit645e1ad33a228162af5a14f971600bd2e5513b11 (patch)
tree1afb7391eb7e74ccfdecd0e7e511808caffa4d09 /clang/test/CodeGen/sse2-builtins.c
parent720f8da33aea7d3bcbf3b64ff251a45f3686ff40 (diff)
downloadbcm5719-llvm-645e1ad33a228162af5a14f971600bd2e5513b11.tar.gz
bcm5719-llvm-645e1ad33a228162af5a14f971600bd2e5513b11.zip
[X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer
According to the gcc headers, intel intrinsics docs and msdn codegen the _mm_store1_pd (and its _mm_store_pd1 equivalent) should use an aligned pointer - the clang headers are the only implementation I can find that assume non-aligned stores (by storing with _mm_storeu_pd). Additionally, according to the intel intrinsics docs and msdn codegen the _mm_store1_ps (_mm_store_ps1) requires a similarly aligned pointer. This patch raises the alignment requirements to match the other implementations by calling _mm_store_ps/_mm_store_pd instead. I've also added the missing _mm_store_pd1 intrinsic (which maps to _mm_store1_pd like _mm_store_ps1 does to _mm_store1_ps). As a followup I'll update the llvm fast-isel tests to match this codegen. Differential Revision: http://reviews.llvm.org/D20617 llvm-svn: 271218
Diffstat (limited to 'clang/test/CodeGen/sse2-builtins.c')
-rw-r--r--clang/test/CodeGen/sse2-builtins.c12
1 files changed, 9 insertions, 3 deletions
diff --git a/clang/test/CodeGen/sse2-builtins.c b/clang/test/CodeGen/sse2-builtins.c
index 467b4f12f8a..de78d707c02 100644
--- a/clang/test/CodeGen/sse2-builtins.c
+++ b/clang/test/CodeGen/sse2-builtins.c
@@ -1205,6 +1205,13 @@ void test_mm_store_pd(double* A, __m128d B) {
_mm_store_pd(A, B);
}
+void test_mm_store_pd1(double* x, __m128d y) {
+ // CHECK-LABEL: test_mm_store_pd1
+ // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
+ // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
+ _mm_store_pd1(x, y);
+}
+
void test_mm_store_sd(double* A, __m128d B) {
// CHECK-LABEL: test_mm_store_sd
// CHECK: extractelement <2 x double> %{{.*}}, i32 0
@@ -1220,9 +1227,8 @@ void test_mm_store_si128(__m128i* A, __m128i B) {
void test_mm_store1_pd(double* x, __m128d y) {
// CHECK-LABEL: test_mm_store1_pd
- // CHECK: extractelement <2 x double> %{{.*}}, i32 0
- // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
- // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
+ // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
+ // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
_mm_store1_pd(x, y);
}
OpenPOWER on IntegriCloud