From d93620bf4d5e165999ad1c3cec2a2fdb02f56218 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 10 Nov 2016 22:34:55 +0000 Subject: IR: Introduce inrange attribute on getelementptr indices. If the inrange keyword is present before any index, loading from or storing to any pointer derived from the getelementptr has undefined behavior if the load or store would access memory outside of the bounds of the element selected by the index marked as inrange. This can be used, e.g. for alias analysis or to split globals at element boundaries where beneficial. As previously proposed on llvm-dev: http://lists.llvm.org/pipermail/llvm-dev/2016-July/102472.html Differential Revision: https://reviews.llvm.org/D22793 llvm-svn: 286514 --- llvm/lib/Analysis/ConstantFolding.cpp | 40 +++++++++++++++++----- llvm/lib/AsmParser/LLLexer.cpp | 1 + llvm/lib/AsmParser/LLParser.cpp | 31 +++++++++++------ llvm/lib/AsmParser/LLParser.h | 3 +- llvm/lib/AsmParser/LLToken.h | 1 + llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 22 ++++++++++--- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 7 ++-- llvm/lib/IR/AsmWriter.cpp | 6 ++++ llvm/lib/IR/ConstantFold.cpp | 55 ++++++++++++++++++------------- llvm/lib/IR/ConstantFold.h | 7 ++-- llvm/lib/IR/Constants.cpp | 13 +++++--- 11 files changed, 129 insertions(+), 57 deletions(-) (limited to 'llvm/lib') diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index ec442cedac0..69e2e8ad20f 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -718,8 +718,8 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1, /// If array indices are not pointer-sized integers, explicitly cast them so /// that they aren't implicitly casted by the getelementptr. Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef Ops, - Type *ResultTy, const DataLayout &DL, - const TargetLibraryInfo *TLI) { + Type *ResultTy, Optional InRangeIndex, + const DataLayout &DL, const TargetLibraryInfo *TLI) { Type *IntPtrTy = DL.getIntPtrType(ResultTy); bool Any = false; @@ -742,7 +742,8 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef Ops, if (!Any) return nullptr; - Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], NewIdxs); + Constant *C = ConstantExpr::getGetElementPtr( + SrcElemTy, Ops[0], NewIdxs, /*InBounds=*/false, InRangeIndex); if (Constant *Folded = ConstantFoldConstant(C, DL, TLI)) C = Folded; @@ -771,13 +772,16 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, ArrayRef Ops, const DataLayout &DL, const TargetLibraryInfo *TLI) { + const GEPOperator *InnermostGEP = GEP; + Type *SrcElemTy = GEP->getSourceElementType(); Type *ResElemTy = GEP->getResultElementType(); Type *ResTy = GEP->getType(); if (!SrcElemTy->isSized()) return nullptr; - if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, DL, TLI)) + if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, + GEP->getInRangeIndex(), DL, TLI)) return C; Constant *Ptr = Ops[0]; @@ -820,6 +824,8 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, // If this is a GEP of a GEP, fold it all into a single GEP. while (auto *GEP = dyn_cast(Ptr)) { + InnermostGEP = GEP; + SmallVector NestedOps(GEP->op_begin() + 1, GEP->op_end()); // Do not try the incorporate the sub-GEP if some index is not a number. @@ -925,8 +931,23 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, if (Offset != 0) return nullptr; + // Preserve the inrange index from the innermost GEP if possible. We must + // have calculated the same indices up to and including the inrange index. + Optional InRangeIndex; + if (Optional LastIRIndex = InnermostGEP->getInRangeIndex()) + if (SrcElemTy == InnermostGEP->getSourceElementType() && + NewIdxs.size() > *LastIRIndex) { + InRangeIndex = LastIRIndex; + for (unsigned I = 0; I <= *LastIRIndex; ++I) + if (NewIdxs[I] != InnermostGEP->getOperand(I + 1)) { + InRangeIndex = None; + break; + } + } + // Create a GEP. - Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs); + Constant *C = ConstantExpr::getGetElementPtr( + SrcElemTy, Ptr, NewIdxs, /*InBounds=*/false, InRangeIndex); assert(C->getType()->getPointerElementType() == Ty && "Computed GetElementPtr has unexpected type!"); @@ -944,8 +965,8 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, /// attempting to fold instructions like loads and stores, which have no /// constant expression form. /// -/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/etc -/// information, due to only being passed an opcode and operands. Constant +/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/inrange +/// etc information, due to only being passed an opcode and operands. Constant /// folding using this function strips this information. /// Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, @@ -965,8 +986,9 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI)) return C; - return ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), - Ops[0], Ops.slice(1)); + return ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), Ops[0], + Ops.slice(1), GEP->isInBounds(), + GEP->getInRangeIndex()); } if (auto *CE = dyn_cast(InstOrCE)) diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 2dc1c0a1487..2ead37198b1 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -551,6 +551,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(nsw); KEYWORD(exact); KEYWORD(inbounds); + KEYWORD(inrange); KEYWORD(align); KEYWORD(addrspace); KEYWORD(section); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 2b8d2f81011..d73818d2b86 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -3175,7 +3175,9 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return true; } - if (ParseGlobalValueVector(Elts) || + Optional InRangeOp; + if (ParseGlobalValueVector( + Elts, Opc == Instruction::GetElementPtr ? &InRangeOp : nullptr) || ParseToken(lltok::rparen, "expected ')' in constantexpr")) return true; @@ -3214,8 +3216,16 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { if (!GetElementPtrInst::getIndexedType(Ty, Indices)) return Error(ID.Loc, "invalid getelementptr indices"); - ID.ConstantVal = - ConstantExpr::getGetElementPtr(Ty, Elts[0], Indices, InBounds); + + if (InRangeOp) { + if (*InRangeOp == 0) + return Error(ID.Loc, + "inrange keyword may not appear on pointer operand"); + --*InRangeOp; + } + + ID.ConstantVal = ConstantExpr::getGetElementPtr(Ty, Elts[0], Indices, + InBounds, InRangeOp); } else if (Opc == Instruction::Select) { if (Elts.size() != 3) return Error(ID.Loc, "expected three operands to select"); @@ -3298,8 +3308,9 @@ bool LLParser::parseOptionalComdat(StringRef GlobalName, Comdat *&C) { /// ParseGlobalValueVector /// ::= /*empty*/ -/// ::= TypeAndValue (',' TypeAndValue)* -bool LLParser::ParseGlobalValueVector(SmallVectorImpl &Elts) { +/// ::= [inrange] TypeAndValue (',' [inrange] TypeAndValue)* +bool LLParser::ParseGlobalValueVector(SmallVectorImpl &Elts, + Optional *InRangeOp) { // Empty list. if (Lex.getKind() == lltok::rbrace || Lex.getKind() == lltok::rsquare || @@ -3307,14 +3318,14 @@ bool LLParser::ParseGlobalValueVector(SmallVectorImpl &Elts) { Lex.getKind() == lltok::rparen) return false; - Constant *C; - if (ParseGlobalTypeAndValue(C)) return true; - Elts.push_back(C); + do { + if (InRangeOp && !*InRangeOp && EatIfPresent(lltok::kw_inrange)) + *InRangeOp = Elts.size(); - while (EatIfPresent(lltok::comma)) { + Constant *C; if (ParseGlobalTypeAndValue(C)) return true; Elts.push_back(C); - } + } while (EatIfPresent(lltok::comma)); return false; } diff --git a/llvm/lib/AsmParser/LLParser.h b/llvm/lib/AsmParser/LLParser.h index 479ff96bc8a..16d4e8b5baa 100644 --- a/llvm/lib/AsmParser/LLParser.h +++ b/llvm/lib/AsmParser/LLParser.h @@ -411,7 +411,8 @@ namespace llvm { bool ParseValID(ValID &ID, PerFunctionState *PFS = nullptr); bool ParseGlobalValue(Type *Ty, Constant *&V); bool ParseGlobalTypeAndValue(Constant *&V); - bool ParseGlobalValueVector(SmallVectorImpl &Elts); + bool ParseGlobalValueVector(SmallVectorImpl &Elts, + Optional *InRangeOp = nullptr); bool parseOptionalComdat(StringRef GlobalName, Comdat *&C); bool ParseMetadataAsValue(Value *&V, PerFunctionState &PFS); bool ParseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg, diff --git a/llvm/lib/AsmParser/LLToken.h b/llvm/lib/AsmParser/LLToken.h index 555aee3ef04..a1695258bbe 100644 --- a/llvm/lib/AsmParser/LLToken.h +++ b/llvm/lib/AsmParser/LLToken.h @@ -103,6 +103,7 @@ enum Kind { kw_nsw, kw_exact, kw_inbounds, + kw_inrange, kw_align, kw_addrspace, kw_section, diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 23876ffcb99..d2c2ec774c1 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -3268,12 +3268,25 @@ Error BitcodeReader::parseConstants() { } break; } - case bitc::CST_CODE_CE_INBOUNDS_GEP: - case bitc::CST_CODE_CE_GEP: { // CE_GEP: [n x operands] + case bitc::CST_CODE_CE_INBOUNDS_GEP: // [ty, n x operands] + case bitc::CST_CODE_CE_GEP: // [ty, n x operands] + case bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX: { // [ty, flags, n x + // operands] unsigned OpNum = 0; Type *PointeeType = nullptr; - if (Record.size() % 2) + if (BitCode == bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX || + Record.size() % 2) PointeeType = getTypeByID(Record[OpNum++]); + + bool InBounds = false; + Optional InRangeIndex; + if (BitCode == bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX) { + uint64_t Op = Record[OpNum++]; + InBounds = Op & 1; + InRangeIndex = Op >> 1; + } else if (BitCode == bitc::CST_CODE_CE_INBOUNDS_GEP) + InBounds = true; + SmallVector Elts; while (OpNum != Record.size()) { Type *ElTy = getTypeByID(Record[OpNum++]); @@ -3294,8 +3307,7 @@ Error BitcodeReader::parseConstants() { ArrayRef Indices(Elts.begin() + 1, Elts.end()); V = ConstantExpr::getGetElementPtr(PointeeType, Elts[0], Indices, - BitCode == - bitc::CST_CODE_CE_INBOUNDS_GEP); + InBounds, InRangeIndex); break; } case bitc::CST_CODE_CE_SELECT: { // CE_SELECT: [opval#, opval#, opval#] diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 8de61bc0b7c..0a09b3cef91 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -2217,9 +2217,12 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal, case Instruction::GetElementPtr: { Code = bitc::CST_CODE_CE_GEP; const auto *GO = cast(C); - if (GO->isInBounds()) - Code = bitc::CST_CODE_CE_INBOUNDS_GEP; Record.push_back(VE.getTypeID(GO->getSourceElementType())); + if (Optional Idx = GO->getInRangeIndex()) { + Code = bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX; + Record.push_back((*Idx << 1) | GO->isInBounds()); + } else if (GO->isInBounds()) + Code = bitc::CST_CODE_CE_INBOUNDS_GEP; for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) { Record.push_back(VE.getTypeID(C->getOperand(i)->getType())); Record.push_back(VE.getValueID(C->getOperand(i))); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index f93739d14c7..7334c988f29 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1320,12 +1320,18 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, static_cast(CE->getPredicate())); Out << " ("; + Optional InRangeOp; if (const GEPOperator *GEP = dyn_cast(CE)) { TypePrinter.print(GEP->getSourceElementType(), Out); Out << ", "; + InRangeOp = GEP->getInRangeIndex(); + if (InRangeOp) + ++*InRangeOp; } for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) { + if (InRangeOp && (OI - CE->op_begin()) == *InRangeOp) + Out << "inrange "; TypePrinter.print((*OI)->getType(), Out); Out << ' '; WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine, Context); diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 7625619e02e..37bab3a0d28 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -545,7 +545,10 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, } else if (CE->getOpcode() == Instruction::GetElementPtr && // Do not fold addrspacecast (gep 0, .., 0). It might make the // addrspacecast uncanonicalized. - opc != Instruction::AddrSpaceCast) { + opc != Instruction::AddrSpaceCast && + // Do not fold bitcast (gep) with inrange index, as this loses + // information. + !cast(CE)->getInRangeIndex().hasValue()) { // If all of the indexes in the GEP are null values, there is no pointer // adjustment going on. We might as well cast the source pointer. bool isAllNull = true; @@ -2046,10 +2049,10 @@ static bool isIndexInRangeOfSequentialType(SequentialType *STy, return true; } -template -static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C, - bool inBounds, - ArrayRef Idxs) { +Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C, + bool InBounds, + Optional InRangeIndex, + ArrayRef Idxs) { if (Idxs.empty()) return C; Constant *Idx0 = cast(Idxs[0]); if ((Idxs.size() == 1 && Idx0->isNullValue())) @@ -2146,9 +2149,18 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C, NewIndices.push_back(Combined); NewIndices.append(Idxs.begin() + 1, Idxs.end()); + + // The combined GEP normally inherits its index inrange attribute from + // the inner GEP, but if the inner GEP's last index was adjusted by the + // outer GEP, any inbounds attribute on that index is invalidated. + Optional IRIndex = cast(CE)->getInRangeIndex(); + if (IRIndex && *IRIndex == CE->getNumOperands() - 2 && !Idx0->isNullValue()) + IRIndex = None; + return ConstantExpr::getGetElementPtr( cast(CE)->getSourceElementType(), CE->getOperand(0), - NewIndices, inBounds && cast(CE)->isInBounds()); + NewIndices, InBounds && cast(CE)->isInBounds(), + IRIndex); } } @@ -2173,8 +2185,9 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C, if (SrcArrayTy && DstArrayTy && SrcArrayTy->getElementType() == DstArrayTy->getElementType() && SrcPtrTy->getAddressSpace() == DstPtrTy->getAddressSpace()) - return ConstantExpr::getGetElementPtr( - SrcArrayTy, (Constant *)CE->getOperand(0), Idxs, inBounds); + return ConstantExpr::getGetElementPtr(SrcArrayTy, + (Constant *)CE->getOperand(0), + Idxs, InBounds, InRangeIndex); } } } @@ -2194,6 +2207,12 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C, Unknown = true; continue; } + if (InRangeIndex && i == *InRangeIndex + 1) { + // If an index is marked inrange, we cannot apply this canonicalization to + // the following index, as that will cause the inrange index to point to + // the wrong element. + continue; + } if (isa(Ty)) { // The verify makes sure that GEPs into a struct are in range. continue; @@ -2256,27 +2275,17 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C, if (!NewIdxs.empty()) { for (unsigned i = 0, e = Idxs.size(); i != e; ++i) if (!NewIdxs[i]) NewIdxs[i] = cast(Idxs[i]); - return ConstantExpr::getGetElementPtr(PointeeTy, C, NewIdxs, inBounds); + return ConstantExpr::getGetElementPtr(PointeeTy, C, NewIdxs, InBounds, + InRangeIndex); } // If all indices are known integers and normalized, we can do a simple // check for the "inbounds" property. - if (!Unknown && !inBounds) + if (!Unknown && !InBounds) if (auto *GV = dyn_cast(C)) if (!GV->hasExternalWeakLinkage() && isInBoundsIndices(Idxs)) - return ConstantExpr::getInBoundsGetElementPtr(PointeeTy, C, Idxs); + return ConstantExpr::getGetElementPtr(PointeeTy, C, Idxs, + /*InBounds=*/true, InRangeIndex); return nullptr; } - -Constant *llvm::ConstantFoldGetElementPtr(Type *Ty, Constant *C, - bool inBounds, - ArrayRef Idxs) { - return ConstantFoldGetElementPtrImpl(Ty, C, inBounds, Idxs); -} - -Constant *llvm::ConstantFoldGetElementPtr(Type *Ty, Constant *C, - bool inBounds, - ArrayRef Idxs) { - return ConstantFoldGetElementPtrImpl(Ty, C, inBounds, Idxs); -} diff --git a/llvm/lib/IR/ConstantFold.h b/llvm/lib/IR/ConstantFold.h index 9b0a937c84d..2d8de1132b9 100644 --- a/llvm/lib/IR/ConstantFold.h +++ b/llvm/lib/IR/ConstantFold.h @@ -19,6 +19,8 @@ #ifndef LLVM_LIB_IR_CONSTANTFOLD_H #define LLVM_LIB_IR_CONSTANTFOLD_H +#include "llvm/ADT/Optional.h" + namespace llvm { template class ArrayRef; class Value; @@ -46,9 +48,8 @@ template class ArrayRef; Constant *V2); Constant *ConstantFoldCompareInstruction(unsigned short predicate, Constant *C1, Constant *C2); - Constant *ConstantFoldGetElementPtr(Type *Ty, Constant *C, bool inBounds, - ArrayRef Idxs); - Constant *ConstantFoldGetElementPtr(Type *Ty, Constant *C, bool inBounds, + Constant *ConstantFoldGetElementPtr(Type *Ty, Constant *C, bool InBounds, + Optional InRangeIndex, ArrayRef Idxs); } // End llvm namespace diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index 8138f8904b8..0e5fa248caa 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -1167,7 +1167,7 @@ Constant *ConstantExpr::getWithOperands(ArrayRef Ops, Type *Ty, assert(SrcTy || (Ops[0]->getType() == getOperand(0)->getType())); return ConstantExpr::getGetElementPtr( SrcTy ? SrcTy : GEPO->getSourceElementType(), Ops[0], Ops.slice(1), - GEPO->isInBounds(), OnlyIfReducedTy); + GEPO->isInBounds(), GEPO->getInRangeIndex(), OnlyIfReducedTy); } case Instruction::ICmp: case Instruction::FCmp: @@ -1893,6 +1893,7 @@ Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2, Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C, ArrayRef Idxs, bool InBounds, + Optional InRangeIndex, Type *OnlyIfReducedTy) { if (!Ty) Ty = cast(C->getType()->getScalarType())->getElementType(); @@ -1901,7 +1902,8 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C, Ty == cast(C->getType()->getScalarType())->getContainedType(0u)); - if (Constant *FC = ConstantFoldGetElementPtr(Ty, C, InBounds, Idxs)) + if (Constant *FC = + ConstantFoldGetElementPtr(Ty, C, InBounds, InRangeIndex, Idxs)) return FC; // Fold a few common cases. // Get the result type of the getelementptr! @@ -1937,9 +1939,12 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C, Idx = ConstantVector::getSplat(NumVecElts, Idx); ArgVec.push_back(Idx); } + + unsigned SubClassOptionalData = InBounds ? GEPOperator::IsInBounds : 0; + if (InRangeIndex && *InRangeIndex < 63) + SubClassOptionalData |= (*InRangeIndex + 1) << 1; const ConstantExprKeyType Key(Instruction::GetElementPtr, ArgVec, 0, - InBounds ? GEPOperator::IsInBounds : 0, None, - Ty); + SubClassOptionalData, None, Ty); LLVMContextImpl *pImpl = C->getContext().pImpl; return pImpl->ExprConstants.getOrCreate(ReqTy, Key); -- cgit v1.2.3