summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp99
1 files changed, 98 insertions, 1 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 6a2f2b0a4a1..6b2484b8c8b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -8449,6 +8449,76 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
}
+// Emit addition or subtraction with saturation.
+// Handles both signed and unsigned intrinsics.
+static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF, const CallExpr *E,
+ SmallVectorImpl<Value *> &Ops,
+ bool IsAddition, bool Signed) {
+
+ // Collect vector elements and type data.
+ llvm::Type *ResultType = CGF.ConvertType(E->getType());
+ int NumElements = ResultType->getVectorNumElements();
+ Value *Res;
+ if (!IsAddition && !Signed) {
+ Value *ICmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]);
+ Value *Select = CGF.Builder.CreateSelect(ICmp, Ops[0], Ops[1]);
+ Res = CGF.Builder.CreateSub(Select, Ops[1]);
+ } else {
+ unsigned EltSizeInBits = ResultType->getScalarSizeInBits();
+ llvm::Type *ExtElementType = EltSizeInBits == 8 ?
+ CGF.Builder.getInt16Ty() :
+ CGF.Builder.getInt32Ty();
+
+ // Extending vectors to next possible width to make space for possible
+ // overflow.
+ llvm::Type *ExtType = llvm::VectorType::get(ExtElementType, NumElements);
+ Value *VecA = Signed ? CGF.Builder.CreateSExt(Ops[0], ExtType)
+ : CGF.Builder.CreateZExt(Ops[0], ExtType);
+ Value *VecB = Signed ? CGF.Builder.CreateSExt(Ops[1], ExtType)
+ : CGF.Builder.CreateZExt(Ops[1], ExtType);
+
+ llvm::Value *ExtProduct = IsAddition ? CGF.Builder.CreateAdd(VecA, VecB)
+ : CGF.Builder.CreateSub(VecA, VecB);
+
+ // Create vector of the same type as expected result with max possible
+ // values and extend it to the same type as the product of the addition.
+ APInt SignedMaxValue =
+ llvm::APInt::getSignedMaxValue(EltSizeInBits);
+ Value *Max = Signed ? llvm::ConstantInt::get(ResultType, SignedMaxValue)
+ : llvm::Constant::getAllOnesValue(ResultType);
+ Value *ExtMaxVec = Signed ? CGF.Builder.CreateSExt(Max, ExtType)
+ : CGF.Builder.CreateZExt(Max, ExtType);
+ // In Product, replace all overflowed values with max values of non-extended
+ // type.
+ ICmpInst::Predicate Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+ Value *Cmp = CGF.Builder.CreateICmp(Pred, ExtProduct,
+ ExtMaxVec); // 1 if no overflow.
+ Value *SaturatedProduct = CGF.Builder.CreateSelect(
+ Cmp, ExtProduct, ExtMaxVec); // If overflowed, copy from max values.
+
+ if (Signed) {
+ APInt SignedMinValue =
+ llvm::APInt::getSignedMinValue(EltSizeInBits);
+ Value *Min = llvm::ConstantInt::get(ResultType, SignedMinValue);
+ Value *ExtMinVec = CGF.Builder.CreateSExt(Min, ExtType);
+ Value *IsNegative =
+ CGF.Builder.CreateICmp(ICmpInst::ICMP_SLT, SaturatedProduct, ExtMinVec);
+ SaturatedProduct =
+ CGF.Builder.CreateSelect(IsNegative, ExtMinVec, SaturatedProduct);
+ }
+
+ Res = CGF.Builder.CreateTrunc(SaturatedProduct,
+ ResultType); // Trunc to ResultType.
+ }
+ if (E->getNumArgs() == 4) { // For masked intrinsics.
+ Value *VecSRC = Ops[2];
+ Value *Mask = Ops[3];
+ return EmitX86Select(CGF, Mask, Res, VecSRC);
+ }
+
+ return Res;
+}
+
Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
@@ -9516,10 +9586,37 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Load->setVolatile(true);
return Load;
}
+ case X86::BI__builtin_ia32_paddusb512_mask:
+ case X86::BI__builtin_ia32_paddusw512_mask:
+ case X86::BI__builtin_ia32_paddusb256:
+ case X86::BI__builtin_ia32_paddusw256:
+ case X86::BI__builtin_ia32_paddusb128:
+ case X86::BI__builtin_ia32_paddusw128:
+ return EmitX86AddSubSatExpr(*this, E, Ops, true, false); // Add, unsigned.
+ case X86::BI__builtin_ia32_paddsb512_mask:
+ case X86::BI__builtin_ia32_paddsw512_mask:
+ case X86::BI__builtin_ia32_paddsb256:
+ case X86::BI__builtin_ia32_paddsw256:
+ case X86::BI__builtin_ia32_paddsb128:
+ case X86::BI__builtin_ia32_paddsw128:
+ return EmitX86AddSubSatExpr(*this, E, Ops, true, true); // Add, signed.
+ case X86::BI__builtin_ia32_psubusb512_mask:
+ case X86::BI__builtin_ia32_psubusw512_mask:
+ case X86::BI__builtin_ia32_psubusb256:
+ case X86::BI__builtin_ia32_psubusw256:
+ case X86::BI__builtin_ia32_psubusb128:
+ case X86::BI__builtin_ia32_psubusw128:
+ return EmitX86AddSubSatExpr(*this, E, Ops, false, false); // Sub, unsigned.
+ case X86::BI__builtin_ia32_psubsb512_mask:
+ case X86::BI__builtin_ia32_psubsw512_mask:
+ case X86::BI__builtin_ia32_psubsb256:
+ case X86::BI__builtin_ia32_psubsw256:
+ case X86::BI__builtin_ia32_psubsb128:
+ case X86::BI__builtin_ia32_psubsw128:
+ return EmitX86AddSubSatExpr(*this, E, Ops, false, true); // Sub, signed.
}
}
-
Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
SmallVector<Value*, 4> Ops;
OpenPOWER on IntegriCloud