diff options
| author | Craig Topper <craig.topper@gmail.com> | 2016-07-04 07:09:46 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2016-07-04 07:09:46 +0000 |
| commit | ac1823f6e947ffee2f5171a59cc2afc175b66781 (patch) | |
| tree | 647532ee3a4a1b4d29eb325c0d6c723c5fbda99a | |
| parent | d83f818a3e5156f2a1bc7fb3207992acee42986f (diff) | |
| download | bcm5719-llvm-ac1823f6e947ffee2f5171a59cc2afc175b66781.tar.gz bcm5719-llvm-ac1823f6e947ffee2f5171a59cc2afc175b66781.zip | |
[AVX512] Modify what indices we emit for the zero vector we use for zero extension of the result of a v2i1 or v4i1 masked compare. This way we emit something that the backend easily interprets as a concatenation rather than a true shuffle. This delivers slightly better codegen with the current backend capabilities.
llvm-svn: 274484
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 2 | ||||
| -rw-r--r-- | clang/test/CodeGen/avx512vl-builtins.c | 3 |
2 files changed, 3 insertions, 2 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 8a5e2919f68..57628f0f44c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6512,7 +6512,7 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i; for (unsigned i = NumElts; i != 8; ++i) - Indices[i] = NumElts; + Indices[i] = i % NumElts + NumElts; Cmp = CGF.Builder.CreateShuffleVector( Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); } diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/avx512vl-builtins.c index 750c60c1231..18ceeb5f341 100644 --- a/clang/test/CodeGen/avx512vl-builtins.c +++ b/clang/test/CodeGen/avx512vl-builtins.c @@ -8,7 +8,7 @@ __mmask8 test_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) { // CHECK-LABEL: @test_mm_cmpeq_epu32_mask // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}} - // CHECK: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4> + // CHECK: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> return (__mmask8)_mm_cmpeq_epu32_mask(__a, __b); } @@ -22,6 +22,7 @@ __mmask8 test_mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { __mmask8 test_mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) { // CHECK-LABEL: @test_mm_cmpeq_epu64_mask // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}} + // CHECK: shufflevector <2 x i1> %{{.*}}, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> return (__mmask8)_mm_cmpeq_epu64_mask(__a, __b); } |

