diff options
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 152 | ||||
-rw-r--r-- | clang/lib/Headers/emmintrin.h | 48 | ||||
-rw-r--r-- | clang/lib/Headers/xmmintrin.h | 48 |
3 files changed, 198 insertions, 50 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index fca60d50c56..1665460ee82 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5986,8 +5986,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // 3DNow! case X86::BI__builtin_ia32_pswapdsf: case X86::BI__builtin_ia32_pswapdsi: { - const char *name = nullptr; - Intrinsic::ID ID = Intrinsic::not_intrinsic; + const char *name; + Intrinsic::ID ID; switch(BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case X86::BI__builtin_ia32_pswapdsf: @@ -6041,6 +6041,154 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128); return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy)); } + // SSE comparison intrisics + case X86::BI__builtin_ia32_cmpeqps: + case X86::BI__builtin_ia32_cmpltps: + case X86::BI__builtin_ia32_cmpleps: + case X86::BI__builtin_ia32_cmpunordps: + case X86::BI__builtin_ia32_cmpneqps: + case X86::BI__builtin_ia32_cmpnltps: + case X86::BI__builtin_ia32_cmpnleps: + case X86::BI__builtin_ia32_cmpordps: + case X86::BI__builtin_ia32_cmpeqss: + case X86::BI__builtin_ia32_cmpltss: + case X86::BI__builtin_ia32_cmpless: + case X86::BI__builtin_ia32_cmpunordss: + case X86::BI__builtin_ia32_cmpneqss: + case X86::BI__builtin_ia32_cmpnltss: + case X86::BI__builtin_ia32_cmpnless: + case X86::BI__builtin_ia32_cmpordss: + case X86::BI__builtin_ia32_cmpeqpd: + case X86::BI__builtin_ia32_cmpltpd: + case X86::BI__builtin_ia32_cmplepd: + case X86::BI__builtin_ia32_cmpunordpd: + case X86::BI__builtin_ia32_cmpneqpd: + case X86::BI__builtin_ia32_cmpnltpd: + case X86::BI__builtin_ia32_cmpnlepd: + case X86::BI__builtin_ia32_cmpordpd: + case X86::BI__builtin_ia32_cmpeqsd: + case X86::BI__builtin_ia32_cmpltsd: + case X86::BI__builtin_ia32_cmplesd: + case X86::BI__builtin_ia32_cmpunordsd: + case X86::BI__builtin_ia32_cmpneqsd: + case X86::BI__builtin_ia32_cmpnltsd: + case X86::BI__builtin_ia32_cmpnlesd: + case X86::BI__builtin_ia32_cmpordsd: + // These exist so that the builtin that takes an immediate can be bounds + // checked by clang to avoid passing bad immediates to the backend. Since + // AVX has a larger immediate than SSE we would need separate builtins to + // do the different bounds checking. Rather than create a clang specific + // SSE only builtin, this implements eight separate builtins to match gcc + // implementation. + + // Choose the immediate. + unsigned Imm; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_cmpeqps: + case X86::BI__builtin_ia32_cmpeqss: + case X86::BI__builtin_ia32_cmpeqpd: + case X86::BI__builtin_ia32_cmpeqsd: + Imm = 0; + break; + case X86::BI__builtin_ia32_cmpltps: + case X86::BI__builtin_ia32_cmpltss: + case X86::BI__builtin_ia32_cmpltpd: + case X86::BI__builtin_ia32_cmpltsd: + Imm = 1; + break; + case X86::BI__builtin_ia32_cmpleps: + case X86::BI__builtin_ia32_cmpless: + case X86::BI__builtin_ia32_cmplepd: + case X86::BI__builtin_ia32_cmplesd: + Imm = 2; + break; + case X86::BI__builtin_ia32_cmpunordps: + case X86::BI__builtin_ia32_cmpunordss: + case X86::BI__builtin_ia32_cmpunordpd: + case X86::BI__builtin_ia32_cmpunordsd: + Imm = 3; + break; + case X86::BI__builtin_ia32_cmpneqps: + case X86::BI__builtin_ia32_cmpneqss: + case X86::BI__builtin_ia32_cmpneqpd: + case X86::BI__builtin_ia32_cmpneqsd: + Imm = 4; + break; + case X86::BI__builtin_ia32_cmpnltps: + case X86::BI__builtin_ia32_cmpnltss: + case X86::BI__builtin_ia32_cmpnltpd: + case X86::BI__builtin_ia32_cmpnltsd: + Imm = 5; + break; + case X86::BI__builtin_ia32_cmpnleps: + case X86::BI__builtin_ia32_cmpnless: + case X86::BI__builtin_ia32_cmpnlepd: + case X86::BI__builtin_ia32_cmpnlesd: + Imm = 6; + break; + case X86::BI__builtin_ia32_cmpordps: + case X86::BI__builtin_ia32_cmpordss: + case X86::BI__builtin_ia32_cmpordpd: + case X86::BI__builtin_ia32_cmpordsd: + Imm = 7; + break; + } + + // Choose the intrinsic ID. + const char *name; + Intrinsic::ID ID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_cmpeqps: + case X86::BI__builtin_ia32_cmpltps: + case X86::BI__builtin_ia32_cmpleps: + case X86::BI__builtin_ia32_cmpunordps: + case X86::BI__builtin_ia32_cmpneqps: + case X86::BI__builtin_ia32_cmpnltps: + case X86::BI__builtin_ia32_cmpnleps: + case X86::BI__builtin_ia32_cmpordps: + name = "cmpps"; + ID = Intrinsic::x86_sse_cmp_ps; + break; + case X86::BI__builtin_ia32_cmpeqss: + case X86::BI__builtin_ia32_cmpltss: + case X86::BI__builtin_ia32_cmpless: + case X86::BI__builtin_ia32_cmpunordss: + case X86::BI__builtin_ia32_cmpneqss: + case X86::BI__builtin_ia32_cmpnltss: + case X86::BI__builtin_ia32_cmpnless: + case X86::BI__builtin_ia32_cmpordss: + name = "cmpss"; + ID = Intrinsic::x86_sse_cmp_ss; + break; + case X86::BI__builtin_ia32_cmpeqpd: + case X86::BI__builtin_ia32_cmpltpd: + case X86::BI__builtin_ia32_cmplepd: + case X86::BI__builtin_ia32_cmpunordpd: + case X86::BI__builtin_ia32_cmpneqpd: + case X86::BI__builtin_ia32_cmpnltpd: + case X86::BI__builtin_ia32_cmpnlepd: + case X86::BI__builtin_ia32_cmpordpd: + name = "cmppd"; + ID = Intrinsic::x86_sse2_cmp_pd; + break; + case X86::BI__builtin_ia32_cmpeqsd: + case X86::BI__builtin_ia32_cmpltsd: + case X86::BI__builtin_ia32_cmplesd: + case X86::BI__builtin_ia32_cmpunordsd: + case X86::BI__builtin_ia32_cmpneqsd: + case X86::BI__builtin_ia32_cmpnltsd: + case X86::BI__builtin_ia32_cmpnlesd: + case X86::BI__builtin_ia32_cmpordsd: + name = "cmpsd"; + ID = Intrinsic::x86_sse2_cmp_sd; + break; + } + + Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); + llvm::Function *F = CGM.getIntrinsic(ID); + return Builder.CreateCall(F, Ops, name); } } diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index b3f8569524b..28d004309cf 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -155,148 +155,148 @@ _mm_xor_pd(__m128d __a, __m128d __b) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 0); + return (__m128d)__builtin_ia32_cmpeqpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmplt_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 1); + return (__m128d)__builtin_ia32_cmpltpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmple_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 2); + return (__m128d)__builtin_ia32_cmplepd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__b, __a, 1); + return (__m128d)__builtin_ia32_cmpltpd(__b, __a); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpge_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__b, __a, 2); + return (__m128d)__builtin_ia32_cmplepd(__b, __a); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpord_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 7); + return (__m128d)__builtin_ia32_cmpordpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpunord_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 3); + return (__m128d)__builtin_ia32_cmpunordpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpneq_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 4); + return (__m128d)__builtin_ia32_cmpneqpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnlt_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 5); + return (__m128d)__builtin_ia32_cmpnltpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnle_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 6); + return (__m128d)__builtin_ia32_cmpnlepd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpngt_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__b, __a, 5); + return (__m128d)__builtin_ia32_cmpnltpd(__b, __a); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnge_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__b, __a, 6); + return (__m128d)__builtin_ia32_cmpnlepd(__b, __a); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 0); + return (__m128d)__builtin_ia32_cmpeqsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmplt_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 1); + return (__m128d)__builtin_ia32_cmpltsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmple_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 2); + return (__m128d)__builtin_ia32_cmplesd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1); + __m128d __c = __builtin_ia32_cmpltsd(__b, __a); return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpge_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2); + __m128d __c = __builtin_ia32_cmplesd(__b, __a); return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpord_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 7); + return (__m128d)__builtin_ia32_cmpordsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpunord_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 3); + return (__m128d)__builtin_ia32_cmpunordsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpneq_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 4); + return (__m128d)__builtin_ia32_cmpneqsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnlt_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 5); + return (__m128d)__builtin_ia32_cmpnltsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnle_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 6); + return (__m128d)__builtin_ia32_cmpnlesd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpngt_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5); + __m128d __c = __builtin_ia32_cmpnltsd(__b, __a); return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnge_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6); + __m128d __c = __builtin_ia32_cmpnlesd(__b, __a); return (__m128d) { __c[0], __a[1] }; } diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index c9befcb456f..d1afe81601c 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -182,153 +182,153 @@ _mm_xor_ps(__m128 __a, __m128 __b) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 0); + return (__m128)__builtin_ia32_cmpeqss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 0); + return (__m128)__builtin_ia32_cmpeqps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmplt_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 1); + return (__m128)__builtin_ia32_cmpltss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmplt_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 1); + return (__m128)__builtin_ia32_cmpltps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmple_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 2); + return (__m128)__builtin_ia32_cmpless(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmple_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 2); + return (__m128)__builtin_ia32_cmpleps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector(__a, - __builtin_ia32_cmpss(__b, __a, 1), + __builtin_ia32_cmpltss(__b, __a), 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__b, __a, 1); + return (__m128)__builtin_ia32_cmpltps(__b, __a); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpge_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector(__a, - __builtin_ia32_cmpss(__b, __a, 2), + __builtin_ia32_cmpless(__b, __a), 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpge_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__b, __a, 2); + return (__m128)__builtin_ia32_cmpleps(__b, __a); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpneq_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 4); + return (__m128)__builtin_ia32_cmpneqss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpneq_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 4); + return (__m128)__builtin_ia32_cmpneqps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnlt_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 5); + return (__m128)__builtin_ia32_cmpnltss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnlt_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 5); + return (__m128)__builtin_ia32_cmpnltps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnle_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 6); + return (__m128)__builtin_ia32_cmpnless(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnle_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 6); + return (__m128)__builtin_ia32_cmpnleps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpngt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector(__a, - __builtin_ia32_cmpss(__b, __a, 5), + __builtin_ia32_cmpnltss(__b, __a), 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpngt_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__b, __a, 5); + return (__m128)__builtin_ia32_cmpnltps(__b, __a); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnge_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector(__a, - __builtin_ia32_cmpss(__b, __a, 6), + __builtin_ia32_cmpnless(__b, __a), 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnge_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__b, __a, 6); + return (__m128)__builtin_ia32_cmpnleps(__b, __a); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpord_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 7); + return (__m128)__builtin_ia32_cmpordss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpord_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 7); + return (__m128)__builtin_ia32_cmpordps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpunord_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 3); + return (__m128)__builtin_ia32_cmpunordss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpunord_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 3); + return (__m128)__builtin_ia32_cmpunordps(__a, __b); } static __inline__ int __attribute__((__always_inline__, __nodebug__)) |