summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-04-14 15:05:57 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-04-14 15:05:57 +0000
commit9f6e79c5e41c8cc1fac3f1793da0ddd14ee0121b (patch)
tree2f4843138178b0775b3134ffed3aeb00ca332e9c
parent5a22eaa2bf4e72bebe79b70af500c7bce9a6977e (diff)
downloadbcm5719-llvm-9f6e79c5e41c8cc1fac3f1793da0ddd14ee0121b.tar.gz
bcm5719-llvm-9f6e79c5e41c8cc1fac3f1793da0ddd14ee0121b.zip
[X86][SSE] Update MOVNTDQA non-temporal loads to generic implementation (clang)
MOVNTDQA non-temporal aligned vector loads can be correctly represented using generic builtin loads, allowing us to remove the existing x86 intrinsics. LLVM companion patch: D31767. Differential Revision: https://reviews.llvm.org/D31766 llvm-svn: 300326
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.def3
-rw-r--r--clang/lib/Headers/avx2intrin.h2
-rw-r--r--clang/lib/Headers/avx512fintrin.h2
-rw-r--r--clang/lib/Headers/smmintrin.h2
-rw-r--r--clang/test/CodeGen/avx2-builtins.c2
-rw-r--r--clang/test/CodeGen/avx512f-builtins.c2
-rw-r--r--clang/test/CodeGen/sse41-builtins.c2
7 files changed, 6 insertions, 9 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index e2461758844..c8a3c2f4d3a 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -391,7 +391,6 @@ TARGET_BUILTIN(__builtin_ia32_roundsd, "V2dV2dV2dIi", "", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_roundpd, "V2dV2dIi", "", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_dpps, "V4fV4fV4fIc", "", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_dppd, "V2dV2dV2dIc", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_movntdqa, "V2LLiV2LLiC*", "", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_ptestz128, "iV2LLiV2LLi", "", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_ptestc128, "iV2LLiV2LLi", "", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "", "sse4.1")
@@ -576,7 +575,6 @@ TARGET_BUILTIN(__builtin_ia32_psrldi256, "V8iV8ii", "", "avx2")
TARGET_BUILTIN(__builtin_ia32_psrld256, "V8iV8iV4i", "", "avx2")
TARGET_BUILTIN(__builtin_ia32_psrlqi256, "V4LLiV4LLii", "", "avx2")
TARGET_BUILTIN(__builtin_ia32_psrlq256, "V4LLiV4LLiV2LLi", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_movntdqa256, "V4LLiV4LLiC*", "", "avx2")
TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "", "avx2")
TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8i", "", "avx2")
TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIc", "", "avx2")
@@ -1747,7 +1745,6 @@ TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_movntdqa512, "V8LLiV8LLi*","","avx512f")
TARGET_BUILTIN(__builtin_ia32_palignr512_mask, "V64cV64cV64cIiV64cULLi","","avx512bw")
TARGET_BUILTIN(__builtin_ia32_dbpsadbw128_mask, "V8sV16cV16cIiV8sUc","","avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_dbpsadbw256_mask, "V16sV32cV32cIiV16sUs","","avx512bw,avx512vl")
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 13bcbef4dbb..5d83a8db484 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -832,7 +832,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_stream_load_si256(__m256i const *__V)
{
- return (__m256i)__builtin_ia32_movntdqa256((const __v4di *)__V);
+ return (__m256i)__builtin_nontemporal_load((const __v4di *)__V);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 707857f57f2..d8535f76588 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -8931,7 +8931,7 @@ _mm512_stream_si512 (__m512i * __P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_stream_load_si512 (void *__P)
{
- return __builtin_ia32_movntdqa512 ((__v8di *)__P);
+ return (__m512i) __builtin_nontemporal_load((const __v8di *)__P);
}
static __inline__ void __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index c73acc60286..dccba4e40b2 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -691,7 +691,7 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_stream_load_si128 (__m128i const *__V)
{
- return (__m128i) __builtin_ia32_movntdqa ((const __v2di *) __V);
+ return (__m128i) __builtin_nontemporal_load ((const __v2di *) __V);
}
/* SSE4 Packed Integer Min/Max Instructions. */
diff --git a/clang/test/CodeGen/avx2-builtins.c b/clang/test/CodeGen/avx2-builtins.c
index 31b02ac14e2..10f3e715de9 100644
--- a/clang/test/CodeGen/avx2-builtins.c
+++ b/clang/test/CodeGen/avx2-builtins.c
@@ -1117,7 +1117,7 @@ __m256i test_mm256_srlv_epi64(__m256i a, __m256i b) {
__m256i test_mm256_stream_load_si256(__m256i const *a) {
// CHECK-LABEL: test_mm256_stream_load_si256
- // CHECK: call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %{{.*}})
+ // CHECK: load <4 x i64>, <4 x i64>* %{{.*}}, align 32, !nontemporal
return _mm256_stream_load_si256(a);
}
diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c
index b25df327fbe..3ae80141b3b 100644
--- a/clang/test/CodeGen/avx512f-builtins.c
+++ b/clang/test/CodeGen/avx512f-builtins.c
@@ -6251,7 +6251,7 @@ void test_mm512_stream_si512(__m512i * __P, __m512i __A) {
__m512i test_mm512_stream_load_si512(void *__P) {
// CHECK-LABEL: @test_mm512_stream_load_si512
- // CHECK: @llvm.x86.avx512.movntdqa
+ // CHECK: load <8 x i64>, <8 x i64>* %{{.*}}, align 64, !nontemporal
return _mm512_stream_load_si512(__P);
}
diff --git a/clang/test/CodeGen/sse41-builtins.c b/clang/test/CodeGen/sse41-builtins.c
index adf9609b68f..b48b73ec18d 100644
--- a/clang/test/CodeGen/sse41-builtins.c
+++ b/clang/test/CodeGen/sse41-builtins.c
@@ -354,7 +354,7 @@ __m128 test_mm_round_ss(__m128 x, __m128 y) {
__m128i test_mm_stream_load_si128(__m128i const *a) {
// CHECK-LABEL: test_mm_stream_load_si128
- // CHECK: call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %{{.*}})
+ // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16, !nontemporal
return _mm_stream_load_si128(a);
}
OpenPOWER on IntegriCloud