summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp45
-rw-r--r--clang/test/CodeGen/avx2-builtins.c12
2 files changed, 54 insertions, 3 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index dc0b164e6ab..df5ca3a1162 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7289,6 +7289,51 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return getVectorFCmpIR(CmpInst::FCMP_UGT, V2F64);
case X86::BI__builtin_ia32_cmpordpd:
return getVectorFCmpIR(CmpInst::FCMP_ORD, V2F64);
+ case X86::BI__builtin_ia32_cmpps:
+ case X86::BI__builtin_ia32_cmpps256:
+ case X86::BI__builtin_ia32_cmppd:
+ case X86::BI__builtin_ia32_cmppd256: {
+ unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+ // If this one of the SSE immediates, we can use native IR.
+ if (CC < 8) {
+ FCmpInst::Predicate Pred;
+ switch (CC) {
+ case 0: Pred = FCmpInst::FCMP_OEQ; break;
+ case 1: Pred = FCmpInst::FCMP_OLT; break;
+ case 2: Pred = FCmpInst::FCMP_OLE; break;
+ case 3: Pred = FCmpInst::FCMP_UNO; break;
+ case 4: Pred = FCmpInst::FCMP_UNE; break;
+ case 5: Pred = FCmpInst::FCMP_UGE; break;
+ case 6: Pred = FCmpInst::FCMP_UGT; break;
+ case 7: Pred = FCmpInst::FCMP_ORD; break;
+ }
+ Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
+ auto *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
+ auto *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
+ Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
+ return Builder.CreateBitCast(Sext, FPVecTy);
+ }
+
+ // We can't handle 8-31 immediates with native IR, use the intrinsic.
+ Intrinsic::ID ID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_cmpps:
+ ID = Intrinsic::x86_sse_cmp_ps;
+ break;
+ case X86::BI__builtin_ia32_cmpps256:
+ ID = Intrinsic::x86_avx_cmp_ps_256;
+ break;
+ case X86::BI__builtin_ia32_cmppd:
+ ID = Intrinsic::x86_sse2_cmp_pd;
+ break;
+ case X86::BI__builtin_ia32_cmppd256:
+ ID = Intrinsic::x86_avx_cmp_pd_256;
+ break;
+ }
+
+ return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+ }
// SSE scalar comparison intrinsics
case X86::BI__builtin_ia32_cmpeqss:
diff --git a/clang/test/CodeGen/avx2-builtins.c b/clang/test/CodeGen/avx2-builtins.c
index b0deb47e6ec..1e712453733 100644
--- a/clang/test/CodeGen/avx2-builtins.c
+++ b/clang/test/CodeGen/avx2-builtins.c
@@ -488,7 +488,9 @@ __m128d test_mm_mask_i32gather_pd(__m128d a, double const *b, __m128i c, __m128d
__m256d test_mm256_i32gather_pd(double const *b, __m128i c) {
// CHECK-LABEL: test_mm256_i32gather_pd
- // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <4 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double>
// CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> undef, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2)
return _mm256_i32gather_pd(b, c, 2);
}
@@ -516,7 +518,9 @@ __m128 test_mm_mask_i32gather_ps(__m128 a, float const *b, __m128i c, __m128 d)
__m256 test_mm256_i32gather_ps(float const *b, __m256i c) {
// CHECK-LABEL: test_mm256_i32gather_ps
- // CHECK: call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <8 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i32> [[SEXT]] to <8 x float>
// CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2)
return _mm256_i32gather_ps(b, c, 2);
}
@@ -592,7 +596,9 @@ __m128d test_mm_mask_i64gather_pd(__m128d a, double const *b, __m128i c, __m128d
__m256d test_mm256_i64gather_pd(double const *b, __m256i c) {
// CHECK-LABEL: test_mm256_i64gather_pd
- // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <4 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double>
// CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2)
return _mm256_i64gather_pd(b, c, 2);
}
OpenPOWER on IntegriCloud