diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-05-28 08:12:45 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-05-28 08:12:45 +0000 |
commit | 91b77ceaed0401cbfae82a0628ff46a501097a5b (patch) | |
tree | 8d2912868e945e9fc827774aca5bada73deed96d /clang/test/CodeGen | |
parent | 869631f9871d19cd36903ead39e4960f4b003116 (diff) | |
download | bcm5719-llvm-91b77ceaed0401cbfae82a0628ff46a501097a5b.tar.gz bcm5719-llvm-91b77ceaed0401cbfae82a0628ff46a501097a5b.zip |
[X86][SSE] Replace VPMOVSX and (V)PMOVZX integer extension intrinsics with generic IR (clang)
The VPMOVSX and (V)PMOVZX sign/zero extension intrinsics can be safely represented as generic __builtin_convertvector calls instead of x86 intrinsics.
This patch removes the clang builtins and their use in the sse2/avx headers - a companion patch will remove/auto-upgrade the llvm intrinsics.
Note: We already did this for SSE41 PMOVSX sometime ago.
Differential Revision: http://reviews.llvm.org/D20684
llvm-svn: 271106
Diffstat (limited to 'clang/test/CodeGen')
-rw-r--r-- | clang/test/CodeGen/avx2-builtins.c | 30 | ||||
-rw-r--r-- | clang/test/CodeGen/builtins-x86.c | 6 | ||||
-rw-r--r-- | clang/test/CodeGen/sse41-builtins.c | 18 |
3 files changed, 30 insertions, 24 deletions
diff --git a/clang/test/CodeGen/avx2-builtins.c b/clang/test/CodeGen/avx2-builtins.c index 20df26bed9d..f7d850fe149 100644 --- a/clang/test/CodeGen/avx2-builtins.c +++ b/clang/test/CodeGen/avx2-builtins.c @@ -292,73 +292,79 @@ __m256i test_mm256_cmpgt_epi64(__m256i a, __m256i b) { __m256i test_mm256_cvtepi8_epi16(__m128i a) { // CHECK-LABEL: test_mm256_cvtepi8_epi16 - // CHECK: call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %{{.*}}) + // CHECK: sext <16 x i8> %{{.*}} to <16 x i16> return _mm256_cvtepi8_epi16(a); } __m256i test_mm256_cvtepi8_epi32(__m128i a) { // CHECK-LABEL: test_mm256_cvtepi8_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %{{.*}}) + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // CHECK: sext <8 x i8> %{{.*}} to <8 x i32> return _mm256_cvtepi8_epi32(a); } __m256i test_mm256_cvtepi8_epi64(__m128i a) { // CHECK-LABEL: test_mm256_cvtepi8_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %{{.*}}) + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: sext <4 x i8> %{{.*}} to <4 x i64> return _mm256_cvtepi8_epi64(a); } __m256i test_mm256_cvtepi16_epi32(__m128i a) { // CHECK-LABEL: test_mm256_cvtepi16_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %{{.*}}) + // CHECK: sext <8 x i16> %{{.*}} to <8 x i32> return _mm256_cvtepi16_epi32(a); } __m256i test_mm256_cvtepi16_epi64(__m128i a) { // CHECK-LABEL: test_mm256_cvtepi16_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %{{.*}}) + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: sext <4 x i16> %{{.*}} to <4 x i64> return _mm256_cvtepi16_epi64(a); } __m256i test_mm256_cvtepi32_epi64(__m128i a) { // CHECK-LABEL: test_mm256_cvtepi32_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %{{.*}}) + // CHECK: sext <4 x i32> %{{.*}} to <4 x i64> return _mm256_cvtepi32_epi64(a); } __m256i test_mm256_cvtepu8_epi16(__m128i a) { // CHECK-LABEL: test_mm256_cvtepu8_epi16 - // CHECK: call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %{{.*}}) + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16> return _mm256_cvtepu8_epi16(a); } __m256i test_mm256_cvtepu8_epi32(__m128i a) { // CHECK-LABEL: test_mm256_cvtepu8_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %{{.*}}) + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // CHECK: zext <8 x i8> %{{.*}} to <8 x i32> return _mm256_cvtepu8_epi32(a); } __m256i test_mm256_cvtepu8_epi64(__m128i a) { // CHECK-LABEL: test_mm256_cvtepu8_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %{{.*}}) + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: zext <4 x i8> %{{.*}} to <4 x i64> return _mm256_cvtepu8_epi64(a); } __m256i test_mm256_cvtepu16_epi32(__m128i a) { // CHECK-LABEL: test_mm256_cvtepu16_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %{{.*}}) + // CHECK: zext <8 x i16> {{.*}} to <8 x i32> return _mm256_cvtepu16_epi32(a); } __m256i test_mm256_cvtepu16_epi64(__m128i a) { // CHECK-LABEL: test_mm256_cvtepu16_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %{{.*}}) + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: zext <4 x i16> %{{.*}} to <4 x i64> return _mm256_cvtepu16_epi64(a); } __m256i test_mm256_cvtepu32_epi64(__m128i a) { // CHECK-LABEL: test_mm256_cvtepu32_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %{{.*}}) + // CHECK: zext <4 x i32> %{{.*}} to <4 x i64> return _mm256_cvtepu32_epi64(a); } diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index 8b53c631544..8cc88053bc3 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -387,12 +387,6 @@ void f0() { tmp_V4i = __builtin_ia32_pminsd128(tmp_V4i, tmp_V4i); tmp_V4i = __builtin_ia32_pminud128(tmp_V4i, tmp_V4i); tmp_V8s = __builtin_ia32_pminuw128(tmp_V8s, tmp_V8s); - tmp_V4i = __builtin_ia32_pmovzxbd128(tmp_V16c); - tmp_V2LLi = __builtin_ia32_pmovzxbq128(tmp_V16c); - tmp_V8s = __builtin_ia32_pmovzxbw128(tmp_V16c); - tmp_V2LLi = __builtin_ia32_pmovzxdq128(tmp_V4i); - tmp_V4i = __builtin_ia32_pmovzxwd128(tmp_V8s); - tmp_V2LLi = __builtin_ia32_pmovzxwq128(tmp_V8s); tmp_V2LLi = __builtin_ia32_pmuldq128(tmp_V4i, tmp_V4i); tmp_V4i = __builtin_ia32_pmulld128(tmp_V4i, tmp_V4i); tmp_V4f = __builtin_ia32_roundps(tmp_V4f, imm_i_0_16); diff --git a/clang/test/CodeGen/sse41-builtins.c b/clang/test/CodeGen/sse41-builtins.c index 76a9b659b57..0335659502b 100644 --- a/clang/test/CodeGen/sse41-builtins.c +++ b/clang/test/CodeGen/sse41-builtins.c @@ -119,37 +119,43 @@ __m128i test_mm_cvtepi32_epi64(__m128i a) { __m128i test_mm_cvtepu8_epi16(__m128i a) { // CHECK-LABEL: test_mm_cvtepu8_epi16 - // CHECK: call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> {{.*}}) + // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // CHECK: zext <8 x i8> {{.*}} to <8 x i16> return _mm_cvtepu8_epi16(a); } __m128i test_mm_cvtepu8_epi32(__m128i a) { // CHECK-LABEL: test_mm_cvtepu8_epi32 - // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> {{.*}}) + // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: zext <4 x i8> {{.*}} to <4 x i32> return _mm_cvtepu8_epi32(a); } __m128i test_mm_cvtepu8_epi64(__m128i a) { // CHECK-LABEL: test_mm_cvtepu8_epi64 - // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> {{.*}}) + // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <2 x i32> <i32 0, i32 1> + // CHECK: zext <2 x i8> {{.*}} to <2 x i64> return _mm_cvtepu8_epi64(a); } __m128i test_mm_cvtepu16_epi32(__m128i a) { // CHECK-LABEL: test_mm_cvtepu16_epi32 - // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> {{.*}}) + // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: zext <4 x i16> {{.*}} to <4 x i32> return _mm_cvtepu16_epi32(a); } __m128i test_mm_cvtepu16_epi64(__m128i a) { // CHECK-LABEL: test_mm_cvtepu16_epi64 - // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> {{.*}}) + // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <2 x i32> <i32 0, i32 1> + // CHECK: zext <2 x i16> {{.*}} to <2 x i64> return _mm_cvtepu16_epi64(a); } __m128i test_mm_cvtepu32_epi64(__m128i a) { // CHECK-LABEL: test_mm_cvtepu32_epi64 - // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> {{.*}}) + // CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> {{.*}}, <2 x i32> <i32 0, i32 1> + // CHECK: zext <2 x i32> {{.*}} to <2 x i64> return _mm_cvtepu32_epi64(a); } |