diff options
-rw-r--r-- | clang/lib/Headers/emmintrin.h | 12 | ||||
-rw-r--r-- | clang/lib/Headers/xmmintrin.h | 16 | ||||
-rw-r--r-- | clang/test/CodeGen/avx-cmp-builtins.c | 48 |
3 files changed, 68 insertions, 8 deletions
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 56c6c228554..f965dce2d52 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -245,13 +245,15 @@ _mm_cmple_sd(__m128d __a, __m128d __b) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__b, __a, 1); + __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1); + return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpge_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__b, __a, 2); + __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2); + return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) @@ -287,13 +289,15 @@ _mm_cmpnle_sd(__m128d __a, __m128d __b) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpngt_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__b, __a, 5); + __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5); + return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnge_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__b, __a, 6); + __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6); + return (__m128d) { __c[0], __a[1] }; } static __inline__ int __attribute__((__always_inline__, __nodebug__)) diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index b3b23cb7d0d..c68d3ed7b67 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -218,7 +218,9 @@ _mm_cmple_ps(__m128 __a, __m128 __b) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__b, __a, 1); + return (__m128)__builtin_shufflevector(__a, + __builtin_ia32_cmpss(__b, __a, 1), + 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) @@ -230,7 +232,9 @@ _mm_cmpgt_ps(__m128 __a, __m128 __b) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpge_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__b, __a, 2); + return (__m128)__builtin_shufflevector(__a, + __builtin_ia32_cmpss(__b, __a, 2), + 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) @@ -278,7 +282,9 @@ _mm_cmpnle_ps(__m128 __a, __m128 __b) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpngt_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__b, __a, 5); + return (__m128)__builtin_shufflevector(__a, + __builtin_ia32_cmpss(__b, __a, 5), + 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) @@ -290,7 +296,9 @@ _mm_cmpngt_ps(__m128 __a, __m128 __b) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnge_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__b, __a, 6); + return (__m128)__builtin_shufflevector(__a, + __builtin_ia32_cmpss(__b, __a, 6), + 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) diff --git a/clang/test/CodeGen/avx-cmp-builtins.c b/clang/test/CodeGen/avx-cmp-builtins.c index 1ac1c31e2ad..5b205d79d0c 100644 --- a/clang/test/CodeGen/avx-cmp-builtins.c +++ b/clang/test/CodeGen/avx-cmp-builtins.c @@ -44,3 +44,51 @@ __m128d test_cmp_ss(__m128 a, __m128 b) { // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 13) return _mm_cmp_ss(a, b, _CMP_GE_OS); } + +__m128 test_cmpgt_ss(__m128 a, __m128 b) { + // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 1) + // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> + return _mm_cmpgt_ss(a, b); +} + +__m128 test_cmpge_ss(__m128 a, __m128 b) { + // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 2) + // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> + return _mm_cmpge_ss(a, b); +} + +__m128 test_cmpngt_ss(__m128 a, __m128 b) { + // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 5) + // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> + return _mm_cmpngt_ss(a, b); +} + +__m128 test_cmpnge_ss(__m128 a, __m128 b) { + // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 6) + // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> + return _mm_cmpnge_ss(a, b); +} + +__m128d test_cmpgt_sd(__m128d a, __m128d b) { + // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 1) + // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3> + return _mm_cmpgt_sd(a, b); +} + +__m128d test_cmpge_sd(__m128d a, __m128d b) { + // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 2) + // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3> + return _mm_cmpge_sd(a, b); +} + +__m128d test_cmpngt_sd(__m128d a, __m128d b) { + // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 5) + // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3> + return _mm_cmpngt_sd(a, b); +} + +__m128d test_cmpnge_sd(__m128d a, __m128d b) { + // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 6) + // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3> + return _mm_cmpnge_sd(a, b); +} |