diff options
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/CodeGen/CGExpr.cpp | 33 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGExprScalar.cpp | 210 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.h | 8 |
3 files changed, 205 insertions, 46 deletions
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 6808c69cd8c..84ce896506d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3002,9 +3002,10 @@ static llvm::Value *emitArraySubscriptGEP(CodeGenFunction &CGF, llvm::Value *ptr, ArrayRef<llvm::Value*> indices, bool inbounds, + SourceLocation loc, const llvm::Twine &name = "arrayidx") { if (inbounds) { - return CGF.Builder.CreateInBoundsGEP(ptr, indices, name); + return CGF.EmitCheckedInBoundsGEP(ptr, indices, loc, name); } else { return CGF.Builder.CreateGEP(ptr, indices, name); } @@ -3035,8 +3036,9 @@ static QualType getFixedSizeElementType(const ASTContext &ctx, } static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr, - ArrayRef<llvm::Value*> indices, + ArrayRef<llvm::Value *> indices, QualType eltType, bool inbounds, + SourceLocation loc, const llvm::Twine &name = "arrayidx") { // All the indices except that last must be zero. #ifndef NDEBUG @@ -3057,7 +3059,7 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr, getArrayElementAlign(addr.getAlignment(), indices.back(), eltSize); llvm::Value *eltPtr = - emitArraySubscriptGEP(CGF, addr.getPointer(), indices, inbounds, name); + emitArraySubscriptGEP(CGF, addr.getPointer(), indices, inbounds, loc, name); return Address(eltPtr, eltAlign); } @@ -3110,7 +3112,8 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, Address Addr = EmitExtVectorElementLValue(LV); QualType EltType = LV.getType()->castAs<VectorType>()->getElementType(); - Addr = emitArraySubscriptGEP(*this, Addr, Idx, EltType, /*inbounds*/ true); + Addr = emitArraySubscriptGEP(*this, Addr, Idx, EltType, /*inbounds*/ true, + E->getExprLoc()); return MakeAddrLValue(Addr, EltType, LV.getBaseInfo()); } @@ -3138,7 +3141,8 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, } Addr = emitArraySubscriptGEP(*this, Addr, Idx, vla->getElementType(), - !getLangOpts().isSignedOverflowDefined()); + !getLangOpts().isSignedOverflowDefined(), + E->getExprLoc()); } else if (const ObjCObjectType *OIT = E->getType()->getAs<ObjCObjectType>()){ // Indexing over an interface, as in "NSString *P; P[4];" @@ -3163,8 +3167,8 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // Do the GEP. CharUnits EltAlign = getArrayElementAlign(Addr.getAlignment(), Idx, InterfaceSize); - llvm::Value *EltPtr = - emitArraySubscriptGEP(*this, Addr.getPointer(), ScaledIdx, false); + llvm::Value *EltPtr = emitArraySubscriptGEP( + *this, Addr.getPointer(), ScaledIdx, false, E->getExprLoc()); Addr = Address(EltPtr, EltAlign); // Cast back. @@ -3189,14 +3193,16 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, Addr = emitArraySubscriptGEP(*this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, E->getType(), - !getLangOpts().isSignedOverflowDefined()); + !getLangOpts().isSignedOverflowDefined(), + E->getExprLoc()); BaseInfo = ArrayLV.getBaseInfo(); } else { // The base must be a pointer; emit it with an estimate of its alignment. Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); Addr = emitArraySubscriptGEP(*this, Addr, Idx, E->getType(), - !getLangOpts().isSignedOverflowDefined()); + !getLangOpts().isSignedOverflowDefined(), + E->getExprLoc()); } LValue LV = MakeAddrLValue(Addr, E->getType(), BaseInfo); @@ -3368,7 +3374,8 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, else Idx = Builder.CreateNSWMul(Idx, NumElements); EltPtr = emitArraySubscriptGEP(*this, Base, Idx, VLA->getElementType(), - !getLangOpts().isSignedOverflowDefined()); + !getLangOpts().isSignedOverflowDefined(), + E->getExprLoc()); } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) { // If this is A[i] where A is an array, the frontend will have decayed the // base to be a ArrayToPointerDecay implicit cast. While correct, it is @@ -3387,13 +3394,15 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, // Propagate the alignment from the array itself to the result. EltPtr = emitArraySubscriptGEP( *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, - ResultExprTy, !getLangOpts().isSignedOverflowDefined()); + ResultExprTy, !getLangOpts().isSignedOverflowDefined(), + E->getExprLoc()); BaseInfo = ArrayLV.getBaseInfo(); } else { Address Base = emitOMPArraySectionBase(*this, E->getBase(), BaseInfo, BaseTy, ResultExprTy, IsLowerBound); EltPtr = emitArraySubscriptGEP(*this, Base, Idx, ResultExprTy, - !getLangOpts().isSignedOverflowDefined()); + !getLangOpts().isSignedOverflowDefined(), + E->getExprLoc()); } return MakeAddrLValue(EltPtr, ResultExprTy, BaseInfo); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 048b50d8261..d604b4130a2 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" @@ -44,6 +45,43 @@ using llvm::Value; //===----------------------------------------------------------------------===// namespace { + +/// Determine whether the given binary operation may overflow. +/// Sets \p Result to the value of the operation for BO_Add, BO_Sub, BO_Mul, +/// and signed BO_{Div,Rem}. For these opcodes, and for unsigned BO_{Div,Rem}, +/// the returned overflow check is precise. The returned value is 'true' for +/// all other opcodes, to be conservative. +bool mayHaveIntegerOverflow(llvm::ConstantInt *LHS, llvm::ConstantInt *RHS, + BinaryOperator::Opcode Opcode, bool Signed, + llvm::APInt &Result) { + // Assume overflow is possible, unless we can prove otherwise. + bool Overflow = true; + const auto &LHSAP = LHS->getValue(); + const auto &RHSAP = RHS->getValue(); + if (Opcode == BO_Add) { + if (Signed) + Result = LHSAP.sadd_ov(RHSAP, Overflow); + else + Result = LHSAP.uadd_ov(RHSAP, Overflow); + } else if (Opcode == BO_Sub) { + if (Signed) + Result = LHSAP.ssub_ov(RHSAP, Overflow); + else + Result = LHSAP.usub_ov(RHSAP, Overflow); + } else if (Opcode == BO_Mul) { + if (Signed) + Result = LHSAP.smul_ov(RHSAP, Overflow); + else + Result = LHSAP.umul_ov(RHSAP, Overflow); + } else if (Opcode == BO_Div || Opcode == BO_Rem) { + if (Signed && !RHS->isZero()) + Result = LHSAP.sdiv_ov(RHSAP, Overflow); + else + return false; + } + return Overflow; +} + struct BinOpInfo { Value *LHS; Value *RHS; @@ -55,37 +93,14 @@ struct BinOpInfo { /// Check if the binop can result in integer overflow. bool mayHaveIntegerOverflow() const { // Without constant input, we can't rule out overflow. - const auto *LHSCI = dyn_cast<llvm::ConstantInt>(LHS); - const auto *RHSCI = dyn_cast<llvm::ConstantInt>(RHS); + auto *LHSCI = dyn_cast<llvm::ConstantInt>(LHS); + auto *RHSCI = dyn_cast<llvm::ConstantInt>(RHS); if (!LHSCI || !RHSCI) return true; - // Assume overflow is possible, unless we can prove otherwise. - bool Overflow = true; - const auto &LHSAP = LHSCI->getValue(); - const auto &RHSAP = RHSCI->getValue(); - if (Opcode == BO_Add) { - if (Ty->hasSignedIntegerRepresentation()) - (void)LHSAP.sadd_ov(RHSAP, Overflow); - else - (void)LHSAP.uadd_ov(RHSAP, Overflow); - } else if (Opcode == BO_Sub) { - if (Ty->hasSignedIntegerRepresentation()) - (void)LHSAP.ssub_ov(RHSAP, Overflow); - else - (void)LHSAP.usub_ov(RHSAP, Overflow); - } else if (Opcode == BO_Mul) { - if (Ty->hasSignedIntegerRepresentation()) - (void)LHSAP.smul_ov(RHSAP, Overflow); - else - (void)LHSAP.umul_ov(RHSAP, Overflow); - } else if (Opcode == BO_Div || Opcode == BO_Rem) { - if (Ty->hasSignedIntegerRepresentation() && !RHSCI->isZero()) - (void)LHSAP.sdiv_ov(RHSAP, Overflow); - else - return false; - } - return Overflow; + llvm::APInt Result; + return ::mayHaveIntegerOverflow( + LHSCI, RHSCI, Opcode, Ty->hasSignedIntegerRepresentation(), Result); } /// Check if the binop computes a division or a remainder. @@ -1925,7 +1940,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, numElts, "vla.inc"); else - value = Builder.CreateInBoundsGEP(value, numElts, "vla.inc"); + value = CGF.EmitCheckedInBoundsGEP(value, numElts, E->getExprLoc(), + "vla.inc"); // Arithmetic on function pointers (!) is just +-1. } else if (type->isFunctionType()) { @@ -1935,7 +1951,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, amt, "incdec.funcptr"); else - value = Builder.CreateInBoundsGEP(value, amt, "incdec.funcptr"); + value = CGF.EmitCheckedInBoundsGEP(value, amt, E->getExprLoc(), + "incdec.funcptr"); value = Builder.CreateBitCast(value, input->getType()); // For everything else, we can just do a simple increment. @@ -1944,7 +1961,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, amt, "incdec.ptr"); else - value = Builder.CreateInBoundsGEP(value, amt, "incdec.ptr"); + value = CGF.EmitCheckedInBoundsGEP(value, amt, E->getExprLoc(), + "incdec.ptr"); } // Vector increment/decrement. @@ -2025,7 +2043,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, sizeValue, "incdec.objptr"); else - value = Builder.CreateInBoundsGEP(value, sizeValue, "incdec.objptr"); + value = CGF.EmitCheckedInBoundsGEP(value, sizeValue, E->getExprLoc(), + "incdec.objptr"); value = Builder.CreateBitCast(value, input->getType()); } @@ -2692,7 +2711,8 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, pointer = CGF.Builder.CreateGEP(pointer, index, "add.ptr"); } else { index = CGF.Builder.CreateNSWMul(index, numElements, "vla.index"); - pointer = CGF.Builder.CreateInBoundsGEP(pointer, index, "add.ptr"); + pointer = CGF.EmitCheckedInBoundsGEP(pointer, index, op.E->getExprLoc(), + "add.ptr"); } return pointer; } @@ -2709,7 +2729,8 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, if (CGF.getLangOpts().isSignedOverflowDefined()) return CGF.Builder.CreateGEP(pointer, index, "add.ptr"); - return CGF.Builder.CreateInBoundsGEP(pointer, index, "add.ptr"); + return CGF.EmitCheckedInBoundsGEP(pointer, index, op.E->getExprLoc(), + "add.ptr"); } // Construct an fmuladd intrinsic to represent a fused mul-add of MulOp and @@ -3824,3 +3845,124 @@ LValue CodeGenFunction::EmitCompoundAssignmentLValue( llvm_unreachable("Unhandled compound assignment operator"); } + +Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, + ArrayRef<Value *> IdxList, + SourceLocation Loc, + const Twine &Name) { + Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name); + + // If the pointer overflow sanitizer isn't enabled, do nothing. + if (!SanOpts.has(SanitizerKind::PointerOverflow)) + return GEPVal; + + // If the GEP has already been reduced to a constant, leave it be. + if (isa<llvm::Constant>(GEPVal)) + return GEPVal; + + // Only check for overflows in the default address space. + if (GEPVal->getType()->getPointerAddressSpace()) + return GEPVal; + + auto *GEP = cast<llvm::GEPOperator>(GEPVal); + assert(GEP->isInBounds() && "Expected inbounds GEP"); + + SanitizerScope SanScope(this); + auto &VMContext = getLLVMContext(); + const auto &DL = CGM.getDataLayout(); + auto *IntPtrTy = DL.getIntPtrType(GEP->getPointerOperandType()); + + // Grab references to the signed add/mul overflow intrinsics for intptr_t. + auto *Zero = llvm::ConstantInt::getNullValue(IntPtrTy); + auto *SAddIntrinsic = + CGM.getIntrinsic(llvm::Intrinsic::sadd_with_overflow, IntPtrTy); + auto *SMulIntrinsic = + CGM.getIntrinsic(llvm::Intrinsic::smul_with_overflow, IntPtrTy); + + // The total (signed) byte offset for the GEP. + llvm::Value *TotalOffset = nullptr; + // The offset overflow flag - true if the total offset overflows. + llvm::Value *OffsetOverflows = Builder.getFalse(); + + /// Return the result of the given binary operation. + auto eval = [&](BinaryOperator::Opcode Opcode, llvm::Value *LHS, + llvm::Value *RHS) -> llvm::Value * { + assert(Opcode == BO_Add || Opcode == BO_Mul && "Can't eval binop"); + + // If the operands are constants, return a constant result. + if (auto *LHSCI = dyn_cast<llvm::ConstantInt>(LHS)) { + if (auto *RHSCI = dyn_cast<llvm::ConstantInt>(RHS)) { + llvm::APInt N; + bool HasOverflow = mayHaveIntegerOverflow(LHSCI, RHSCI, Opcode, + /*Signed=*/true, N); + if (HasOverflow) + OffsetOverflows = Builder.getTrue(); + return llvm::ConstantInt::get(VMContext, N); + } + } + + // Otherwise, compute the result with checked arithmetic. + auto *ResultAndOverflow = Builder.CreateCall( + (Opcode == BO_Add) ? SAddIntrinsic : SMulIntrinsic, {LHS, RHS}); + OffsetOverflows = Builder.CreateOr( + OffsetOverflows, Builder.CreateExtractValue(ResultAndOverflow, 1)); + return Builder.CreateExtractValue(ResultAndOverflow, 0); + }; + + // Determine the total byte offset by looking at each GEP operand. + for (auto GTI = llvm::gep_type_begin(GEP), GTE = llvm::gep_type_end(GEP); + GTI != GTE; ++GTI) { + llvm::Value *LocalOffset; + auto *Index = GTI.getOperand(); + // Compute the local offset contributed by this indexing step: + if (auto *STy = GTI.getStructTypeOrNull()) { + // For struct indexing, the local offset is the byte position of the + // specified field. + unsigned FieldNo = cast<llvm::ConstantInt>(Index)->getZExtValue(); + LocalOffset = llvm::ConstantInt::get( + IntPtrTy, DL.getStructLayout(STy)->getElementOffset(FieldNo)); + } else { + // Otherwise this is array-like indexing. The local offset is the index + // multiplied by the element size. + auto *ElementSize = llvm::ConstantInt::get( + IntPtrTy, DL.getTypeAllocSize(GTI.getIndexedType())); + auto *IndexS = Builder.CreateIntCast(Index, IntPtrTy, /*isSigned=*/true); + LocalOffset = eval(BO_Mul, ElementSize, IndexS); + } + + // If this is the first offset, set it as the total offset. Otherwise, add + // the local offset into the running total. + if (!TotalOffset || TotalOffset == Zero) + TotalOffset = LocalOffset; + else + TotalOffset = eval(BO_Add, TotalOffset, LocalOffset); + } + + // Common case: if the total offset is zero, don't emit a check. + if (TotalOffset == Zero) + return GEPVal; + + // Now that we've computed the total offset, add it to the base pointer (with + // wrapping semantics). + auto *IntPtr = Builder.CreatePtrToInt(GEP->getPointerOperand(), IntPtrTy); + auto *ComputedGEP = Builder.CreateAdd(IntPtr, TotalOffset); + + // The GEP is valid if: + // 1) The total offset doesn't overflow, and + // 2) The sign of the difference between the computed address and the base + // pointer matches the sign of the total offset. + llvm::Value *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); + llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr); + auto *PosOrZeroOffset = Builder.CreateICmpSGE(TotalOffset, Zero); + llvm::Value *ValidGEP = Builder.CreateAnd( + Builder.CreateNot(OffsetOverflows), + Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid)); + + llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc)}; + // Pass the computed GEP to the runtime to avoid emitting poisoned arguments. + llvm::Value *DynamicArgs[] = {IntPtr, ComputedGEP}; + EmitCheck(std::make_pair(ValidGEP, SanitizerKind::PointerOverflow), + SanitizerHandler::PointerOverflow, StaticArgs, DynamicArgs); + + return GEPVal; +} diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 526ef9a1e57..42ffd0d3efc 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -120,6 +120,7 @@ enum TypeEvaluationKind { SANITIZER_CHECK(NonnullArg, nonnull_arg, 0) \ SANITIZER_CHECK(NonnullReturn, nonnull_return, 0) \ SANITIZER_CHECK(OutOfBounds, out_of_bounds, 0) \ + SANITIZER_CHECK(PointerOverflow, pointer_overflow, 0) \ SANITIZER_CHECK(ShiftOutOfBounds, shift_out_of_bounds, 0) \ SANITIZER_CHECK(SubOverflow, sub_overflow, 0) \ SANITIZER_CHECK(TypeMismatch, type_mismatch, 1) \ @@ -3551,6 +3552,13 @@ public: /// nonnull, if \p LHS is marked _Nonnull. void EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, SourceLocation Loc); + /// Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to + /// detect undefined behavior when the pointer overflow sanitizer is enabled. + llvm::Value *EmitCheckedInBoundsGEP(llvm::Value *Ptr, + ArrayRef<llvm::Value *> IdxList, + SourceLocation Loc, + const Twine &Name = ""); + /// \brief Emit a description of a type in a format suitable for passing to /// a runtime sanitizer handler. llvm::Constant *EmitCheckTypeDescriptor(QualType T); |