X86 intrinsics: cmpge|gt|nge|ngt_ss|_sd

These intrinsics should return the comparision result in the low bits and keep the high bits of the first source operand. When calling to builtin functions, the source operands are swapped and the high bits of the second source operand are kept. To fix the issue, an extra shufflevector is used. rdar://14153896 llvm-svn: 184110
author: Manman Ren <mren@apple.com> 2013-06-17 19:42:49 +0000
committer: Manman Ren <mren@apple.com> 2013-06-17 19:42:49 +0000
commit: 9bb34d66b337fdb575d751ce289078323a9bd0c7 (patch)
tree: 0c5b0e12292b19ba72218927892d2205c2620960 /clang
parent: bb876654753c47984dd2e00b1a6f53373440ec46 (diff)
download: bcm5719-llvm-9bb34d66b337fdb575d751ce289078323a9bd0c7.tar.gz
bcm5719-llvm-9bb34d66b337fdb575d751ce289078323a9bd0c7.zip
3 files changed, 68 insertions, 8 deletions
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 56c6c228554..f965dce2d52 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -245,13 +245,15 @@ _mm_cmple_sd(__m128d __a, __m128d __b)
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__b, __a, 1);
+  __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1);
+  return (__m128d) { __c[0], __a[1] };
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpge_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__b, __a, 2);
+  __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2);
+  return (__m128d) { __c[0], __a[1] };
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
@@ -287,13 +289,15 @@ _mm_cmpnle_sd(__m128d __a, __m128d __b)
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpngt_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__b, __a, 5);
+  __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5);
+  return (__m128d) { __c[0], __a[1] };
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnge_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__b, __a, 6);
+  __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6);
+  return (__m128d) { __c[0], __a[1] };
 }
 
 static __inline__ int __attribute__((__always_inline__, __nodebug__))
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index b3b23cb7d0d..c68d3ed7b67 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -218,7 +218,9 @@ _mm_cmple_ps(__m128 __a, __m128 __b)
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__b, __a, 1);
+  return (__m128)__builtin_shufflevector(__a,
+                                         __builtin_ia32_cmpss(__b, __a, 1),
+                                         4, 1, 2, 3);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
@@ -230,7 +232,9 @@ _mm_cmpgt_ps(__m128 __a, __m128 __b)
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpge_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__b, __a, 2);
+  return (__m128)__builtin_shufflevector(__a,
+                                         __builtin_ia32_cmpss(__b, __a, 2),
+                                         4, 1, 2, 3);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
@@ -278,7 +282,9 @@ _mm_cmpnle_ps(__m128 __a, __m128 __b)
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpngt_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__b, __a, 5);
+  return (__m128)__builtin_shufflevector(__a,
+                                         __builtin_ia32_cmpss(__b, __a, 5),
+                                         4, 1, 2, 3);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
@@ -290,7 +296,9 @@ _mm_cmpngt_ps(__m128 __a, __m128 __b)
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnge_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__b, __a, 6);
+  return (__m128)__builtin_shufflevector(__a,
+                                         __builtin_ia32_cmpss(__b, __a, 6),
+                                         4, 1, 2, 3);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
diff --git a/clang/test/CodeGen/avx-cmp-builtins.c b/clang/test/CodeGen/avx-cmp-builtins.c
index 1ac1c31e2ad..5b205d79d0c 100644
--- a/clang/test/CodeGen/avx-cmp-builtins.c
+++ b/clang/test/CodeGen/avx-cmp-builtins.c
@@ -44,3 +44,51 @@ __m128d test_cmp_ss(__m128 a, __m128 b) {
   // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 13)
   return _mm_cmp_ss(a, b, _CMP_GE_OS);
 }
+
+__m128 test_cmpgt_ss(__m128 a, __m128 b) {
+  // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 1)
+  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  return _mm_cmpgt_ss(a, b);
+}
+
+__m128 test_cmpge_ss(__m128 a, __m128 b) {
+  // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 2)
+  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  return _mm_cmpge_ss(a, b);
+}
+
+__m128 test_cmpngt_ss(__m128 a, __m128 b) {
+  // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 5)
+  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  return _mm_cmpngt_ss(a, b);
+}
+
+__m128 test_cmpnge_ss(__m128 a, __m128 b) {
+  // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 6)
+  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  return _mm_cmpnge_ss(a, b);
+}
+
+__m128d test_cmpgt_sd(__m128d a, __m128d b) {
+  // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 1)
+  // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3>
+  return _mm_cmpgt_sd(a, b);
+}
+
+__m128d test_cmpge_sd(__m128d a, __m128d b) {
+  // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 2)
+  // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3>
+  return _mm_cmpge_sd(a, b);
+}
+
+__m128d test_cmpngt_sd(__m128d a, __m128d b) {
+  // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 5)
+  // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3>
+  return _mm_cmpngt_sd(a, b);
+}
+
+__m128d test_cmpnge_sd(__m128d a, __m128d b) {
+  // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 6)
+  // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3>
+  return _mm_cmpnge_sd(a, b);
+}
author	Manman Ren <mren@apple.com>	2013-06-17 19:42:49 +0000
committer	Manman Ren <mren@apple.com>	2013-06-17 19:42:49 +0000
commit	9bb34d66b337fdb575d751ce289078323a9bd0c7 (patch)
tree	0c5b0e12292b19ba72218927892d2205c2620960 /clang
parent	bb876654753c47984dd2e00b1a6f53373440ec46 (diff)
download	bcm5719-llvm-9bb34d66b337fdb575d751ce289078323a9bd0c7.tar.gz bcm5719-llvm-9bb34d66b337fdb575d751ce289078323a9bd0c7.zip