diff options
Diffstat (limited to 'clang/lib')
110 files changed, 50864 insertions, 0 deletions
diff --git a/clang/lib/AST/ASTConsumer.cpp b/clang/lib/AST/ASTConsumer.cpp new file mode 100644 index 00000000000..b3d12710927 --- /dev/null +++ b/clang/lib/AST/ASTConsumer.cpp @@ -0,0 +1,28 @@ +//===--- ASTConsumer.cpp - Abstract interface for reading ASTs --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ASTConsumer class. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/Decl.h" +using namespace clang; + +ASTConsumer::~ASTConsumer() {} + +void ASTConsumer::HandleTopLevelDeclaration(Decl* d) { + if (ScopedDecl* sd = dyn_cast<ScopedDecl>(d)) + while (sd) { + HandleTopLevelDecl(sd); + sd = sd->getNextDeclarator(); + } + else + HandleTopLevelDecl(d); +} diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp new file mode 100644 index 00000000000..db4d53aa481 --- /dev/null +++ b/clang/lib/AST/ASTContext.cpp @@ -0,0 +1,1853 @@ +//===--- ASTContext.cpp - Context to hold long-lived AST nodes ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ASTContext interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclObjC.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Bitcode/Serialize.h" +#include "llvm/Bitcode/Deserialize.h" + +using namespace clang; + +enum FloatingRank { + FloatRank, DoubleRank, LongDoubleRank +}; + +ASTContext::~ASTContext() { + // Deallocate all the types. + while (!Types.empty()) { + if (FunctionTypeProto *FT = dyn_cast<FunctionTypeProto>(Types.back())) { + // Destroy the object, but don't call delete. These are malloc'd. + FT->~FunctionTypeProto(); + free(FT); + } else { + delete Types.back(); + } + Types.pop_back(); + } +} + +void ASTContext::PrintStats() const { + fprintf(stderr, "*** AST Context Stats:\n"); + fprintf(stderr, " %d types total.\n", (int)Types.size()); + unsigned NumBuiltin = 0, NumPointer = 0, NumArray = 0, NumFunctionP = 0; + unsigned NumVector = 0, NumComplex = 0; + unsigned NumFunctionNP = 0, NumTypeName = 0, NumTagged = 0, NumReference = 0; + + unsigned NumTagStruct = 0, NumTagUnion = 0, NumTagEnum = 0, NumTagClass = 0; + unsigned NumObjCInterfaces = 0, NumObjCQualifiedInterfaces = 0; + unsigned NumObjCQualifiedIds = 0; + + for (unsigned i = 0, e = Types.size(); i != e; ++i) { + Type *T = Types[i]; + if (isa<BuiltinType>(T)) + ++NumBuiltin; + else if (isa<PointerType>(T)) + ++NumPointer; + else if (isa<ReferenceType>(T)) + ++NumReference; + else if (isa<ComplexType>(T)) + ++NumComplex; + else if (isa<ArrayType>(T)) + ++NumArray; + else if (isa<VectorType>(T)) + ++NumVector; + else if (isa<FunctionTypeNoProto>(T)) + ++NumFunctionNP; + else if (isa<FunctionTypeProto>(T)) + ++NumFunctionP; + else if (isa<TypedefType>(T)) + ++NumTypeName; + else if (TagType *TT = dyn_cast<TagType>(T)) { + ++NumTagged; + switch (TT->getDecl()->getKind()) { + default: assert(0 && "Unknown tagged type!"); + case Decl::Struct: ++NumTagStruct; break; + case Decl::Union: ++NumTagUnion; break; + case Decl::Class: ++NumTagClass; break; + case Decl::Enum: ++NumTagEnum; break; + } + } else if (isa<ObjCInterfaceType>(T)) + ++NumObjCInterfaces; + else if (isa<ObjCQualifiedInterfaceType>(T)) + ++NumObjCQualifiedInterfaces; + else if (isa<ObjCQualifiedIdType>(T)) + ++NumObjCQualifiedIds; + else { + QualType(T, 0).dump(); + assert(0 && "Unknown type!"); + } + } + + fprintf(stderr, " %d builtin types\n", NumBuiltin); + fprintf(stderr, " %d pointer types\n", NumPointer); + fprintf(stderr, " %d reference types\n", NumReference); + fprintf(stderr, " %d complex types\n", NumComplex); + fprintf(stderr, " %d array types\n", NumArray); + fprintf(stderr, " %d vector types\n", NumVector); + fprintf(stderr, " %d function types with proto\n", NumFunctionP); + fprintf(stderr, " %d function types with no proto\n", NumFunctionNP); + fprintf(stderr, " %d typename (typedef) types\n", NumTypeName); + fprintf(stderr, " %d tagged types\n", NumTagged); + fprintf(stderr, " %d struct types\n", NumTagStruct); + fprintf(stderr, " %d union types\n", NumTagUnion); + fprintf(stderr, " %d class types\n", NumTagClass); + fprintf(stderr, " %d enum types\n", NumTagEnum); + fprintf(stderr, " %d interface types\n", NumObjCInterfaces); + fprintf(stderr, " %d protocol qualified interface types\n", + NumObjCQualifiedInterfaces); + fprintf(stderr, " %d protocol qualified id types\n", + NumObjCQualifiedIds); + fprintf(stderr, "Total bytes = %d\n", int(NumBuiltin*sizeof(BuiltinType)+ + NumPointer*sizeof(PointerType)+NumArray*sizeof(ArrayType)+ + NumComplex*sizeof(ComplexType)+NumVector*sizeof(VectorType)+ + NumFunctionP*sizeof(FunctionTypeProto)+ + NumFunctionNP*sizeof(FunctionTypeNoProto)+ + NumTypeName*sizeof(TypedefType)+NumTagged*sizeof(TagType))); +} + + +void ASTContext::InitBuiltinType(QualType &R, BuiltinType::Kind K) { + Types.push_back((R = QualType(new BuiltinType(K),0)).getTypePtr()); +} + +void ASTContext::InitBuiltinTypes() { + assert(VoidTy.isNull() && "Context reinitialized?"); + + // C99 6.2.5p19. + InitBuiltinType(VoidTy, BuiltinType::Void); + + // C99 6.2.5p2. + InitBuiltinType(BoolTy, BuiltinType::Bool); + // C99 6.2.5p3. + if (Target.isCharSigned()) + InitBuiltinType(CharTy, BuiltinType::Char_S); + else + InitBuiltinType(CharTy, BuiltinType::Char_U); + // C99 6.2.5p4. + InitBuiltinType(SignedCharTy, BuiltinType::SChar); + InitBuiltinType(ShortTy, BuiltinType::Short); + InitBuiltinType(IntTy, BuiltinType::Int); + InitBuiltinType(LongTy, BuiltinType::Long); + InitBuiltinType(LongLongTy, BuiltinType::LongLong); + + // C99 6.2.5p6. + InitBuiltinType(UnsignedCharTy, BuiltinType::UChar); + InitBuiltinType(UnsignedShortTy, BuiltinType::UShort); + InitBuiltinType(UnsignedIntTy, BuiltinType::UInt); + InitBuiltinType(UnsignedLongTy, BuiltinType::ULong); + InitBuiltinType(UnsignedLongLongTy, BuiltinType::ULongLong); + + // C99 6.2.5p10. + InitBuiltinType(FloatTy, BuiltinType::Float); + InitBuiltinType(DoubleTy, BuiltinType::Double); + InitBuiltinType(LongDoubleTy, BuiltinType::LongDouble); + + // C99 6.2.5p11. + FloatComplexTy = getComplexType(FloatTy); + DoubleComplexTy = getComplexType(DoubleTy); + LongDoubleComplexTy = getComplexType(LongDoubleTy); + + BuiltinVaListType = QualType(); + ObjCIdType = QualType(); + IdStructType = 0; + ObjCClassType = QualType(); + ClassStructType = 0; + + ObjCConstantStringType = QualType(); + + // void * type + VoidPtrTy = getPointerType(VoidTy); +} + +//===----------------------------------------------------------------------===// +// Type Sizing and Analysis +//===----------------------------------------------------------------------===// + +/// getTypeSize - Return the size of the specified type, in bits. This method +/// does not work on incomplete types. +std::pair<uint64_t, unsigned> +ASTContext::getTypeInfo(QualType T) { + T = T.getCanonicalType(); + uint64_t Width; + unsigned Align; + switch (T->getTypeClass()) { + case Type::TypeName: assert(0 && "Not a canonical type!"); + case Type::FunctionNoProto: + case Type::FunctionProto: + default: + assert(0 && "Incomplete types have no size!"); + case Type::VariableArray: + assert(0 && "VLAs not implemented yet!"); + case Type::ConstantArray: { + ConstantArrayType *CAT = cast<ConstantArrayType>(T); + + std::pair<uint64_t, unsigned> EltInfo = getTypeInfo(CAT->getElementType()); + Width = EltInfo.first*CAT->getSize().getZExtValue(); + Align = EltInfo.second; + break; + } + case Type::OCUVector: + case Type::Vector: { + std::pair<uint64_t, unsigned> EltInfo = + getTypeInfo(cast<VectorType>(T)->getElementType()); + Width = EltInfo.first*cast<VectorType>(T)->getNumElements(); + // FIXME: Vector alignment is not the alignment of its elements. + Align = EltInfo.second; + break; + } + + case Type::Builtin: + // FIXME: need to use TargetInfo to derive the target specific sizes. This + // implementation will suffice for play with vector support. + switch (cast<BuiltinType>(T)->getKind()) { + default: assert(0 && "Unknown builtin type!"); + case BuiltinType::Void: + assert(0 && "Incomplete types have no size!"); + case BuiltinType::Bool: + Width = Target.getBoolWidth(); + Align = Target.getBoolAlign(); + break; + case BuiltinType::Char_S: + case BuiltinType::Char_U: + case BuiltinType::UChar: + case BuiltinType::SChar: + Width = Target.getCharWidth(); + Align = Target.getCharAlign(); + break; + case BuiltinType::UShort: + case BuiltinType::Short: + Width = Target.getShortWidth(); + Align = Target.getShortAlign(); + break; + case BuiltinType::UInt: + case BuiltinType::Int: + Width = Target.getIntWidth(); + Align = Target.getIntAlign(); + break; + case BuiltinType::ULong: + case BuiltinType::Long: + Width = Target.getLongWidth(); + Align = Target.getLongAlign(); + break; + case BuiltinType::ULongLong: + case BuiltinType::LongLong: + Width = Target.getLongLongWidth(); + Align = Target.getLongLongAlign(); + break; + case BuiltinType::Float: + Width = Target.getFloatWidth(); + Align = Target.getFloatAlign(); + break; + case BuiltinType::Double: + Width = Target.getDoubleWidth(); + Align = Target.getDoubleAlign(); + break; + case BuiltinType::LongDouble: + Width = Target.getLongDoubleWidth(); + Align = Target.getLongDoubleAlign(); + break; + } + break; + case Type::ASQual: + // FIXME: Pointers into different addr spaces could have different sizes and + // alignment requirements: getPointerInfo should take an AddrSpace. + return getTypeInfo(QualType(cast<ASQualType>(T)->getBaseType(), 0)); + case Type::ObjCQualifiedId: + Width = Target.getPointerWidth(0); + Align = Target.getPointerAlign(0); + break; + case Type::Pointer: { + unsigned AS = cast<PointerType>(T)->getPointeeType().getAddressSpace(); + Width = Target.getPointerWidth(AS); + Align = Target.getPointerAlign(AS); + break; + } + case Type::Reference: + // "When applied to a reference or a reference type, the result is the size + // of the referenced type." C++98 5.3.3p2: expr.sizeof. + // FIXME: This is wrong for struct layout: a reference in a struct has + // pointer size. + return getTypeInfo(cast<ReferenceType>(T)->getReferenceeType()); + + case Type::Complex: { + // Complex types have the same alignment as their elements, but twice the + // size. + std::pair<uint64_t, unsigned> EltInfo = + getTypeInfo(cast<ComplexType>(T)->getElementType()); + Width = EltInfo.first*2; + Align = EltInfo.second; + break; + } + case Type::Tagged: + TagType *TT = cast<TagType>(T); + if (RecordType *RT = dyn_cast<RecordType>(TT)) { + const ASTRecordLayout &Layout = getASTRecordLayout(RT->getDecl()); + Width = Layout.getSize(); + Align = Layout.getAlignment(); + } else if (EnumDecl *ED = dyn_cast<EnumDecl>(TT->getDecl())) { + return getTypeInfo(ED->getIntegerType()); + } else { + assert(0 && "Unimplemented type sizes!"); + } + break; + } + + assert(Align && (Align & (Align-1)) == 0 && "Alignment must be power of 2"); + return std::make_pair(Width, Align); +} + +/// getASTRecordLayout - Get or compute information about the layout of the +/// specified record (struct/union/class), which indicates its size and field +/// position information. +const ASTRecordLayout &ASTContext::getASTRecordLayout(const RecordDecl *D) { + assert(D->isDefinition() && "Cannot get layout of forward declarations!"); + + // Look up this layout, if already laid out, return what we have. + const ASTRecordLayout *&Entry = ASTRecordLayouts[D]; + if (Entry) return *Entry; + + // Allocate and assign into ASTRecordLayouts here. The "Entry" reference can + // be invalidated (dangle) if the ASTRecordLayouts hashtable is inserted into. + ASTRecordLayout *NewEntry = new ASTRecordLayout(); + Entry = NewEntry; + + uint64_t *FieldOffsets = new uint64_t[D->getNumMembers()]; + uint64_t RecordSize = 0; + unsigned RecordAlign = 8; // Default alignment = 1 byte = 8 bits. + + if (D->getKind() != Decl::Union) { + if (const AlignedAttr *AA = D->getAttr<AlignedAttr>()) + RecordAlign = std::max(RecordAlign, AA->getAlignment()); + + bool StructIsPacked = D->getAttr<PackedAttr>(); + + // Layout each field, for now, just sequentially, respecting alignment. In + // the future, this will need to be tweakable by targets. + for (unsigned i = 0, e = D->getNumMembers(); i != e; ++i) { + const FieldDecl *FD = D->getMember(i); + bool FieldIsPacked = StructIsPacked || FD->getAttr<PackedAttr>(); + uint64_t FieldSize; + unsigned FieldAlign; + + if (const Expr *BitWidthExpr = FD->getBitWidth()) { + llvm::APSInt I(32); + bool BitWidthIsICE = + BitWidthExpr->isIntegerConstantExpr(I, *this); + assert (BitWidthIsICE && "Invalid BitField size expression"); + FieldSize = I.getZExtValue(); + + std::pair<uint64_t, unsigned> TypeInfo = getTypeInfo(FD->getType()); + uint64_t TypeSize = TypeInfo.first; + + if (const AlignedAttr *AA = FD->getAttr<AlignedAttr>()) + FieldAlign = AA->getAlignment(); + else if (FieldIsPacked) + FieldAlign = 8; + else { + // FIXME: This is X86 specific, use 32-bit alignment for long long. + if (FD->getType()->isIntegerType() && TypeInfo.second > 32) + FieldAlign = 32; + else + FieldAlign = TypeInfo.second; + } + + // Check if we need to add padding to give the field the correct + // alignment. + if (RecordSize % FieldAlign + FieldSize > TypeSize) + RecordSize = (RecordSize+FieldAlign-1) & ~(FieldAlign-1); + + } else { + if (FD->getType()->isIncompleteType()) { + // This must be a flexible array member; we can't directly + // query getTypeInfo about these, so we figure it out here. + // Flexible array members don't have any size, but they + // have to be aligned appropriately for their element type. + + if (const AlignedAttr *AA = FD->getAttr<AlignedAttr>()) + FieldAlign = AA->getAlignment(); + else if (FieldIsPacked) + FieldAlign = 8; + else { + const ArrayType* ATy = FD->getType()->getAsArrayType(); + FieldAlign = getTypeAlign(ATy->getElementType()); + } + FieldSize = 0; + } else { + std::pair<uint64_t, unsigned> FieldInfo = getTypeInfo(FD->getType()); + FieldSize = FieldInfo.first; + + if (const AlignedAttr *AA = FD->getAttr<AlignedAttr>()) + FieldAlign = AA->getAlignment(); + else if (FieldIsPacked) + FieldAlign = 8; + else + FieldAlign = FieldInfo.second; + } + + // Round up the current record size to the field's alignment boundary. + RecordSize = (RecordSize+FieldAlign-1) & ~(FieldAlign-1); + } + + // Place this field at the current location. + FieldOffsets[i] = RecordSize; + + // Reserve space for this field. + RecordSize += FieldSize; + + // Remember max struct/class alignment. + RecordAlign = std::max(RecordAlign, FieldAlign); + } + + // Finally, round the size of the total struct up to the alignment of the + // struct itself. + RecordSize = (RecordSize+RecordAlign-1) & ~(RecordAlign-1); + } else { + // Union layout just puts each member at the start of the record. + for (unsigned i = 0, e = D->getNumMembers(); i != e; ++i) { + const FieldDecl *FD = D->getMember(i); + std::pair<uint64_t, unsigned> FieldInfo = getTypeInfo(FD->getType()); + uint64_t FieldSize = FieldInfo.first; + unsigned FieldAlign = FieldInfo.second; + + // FIXME: This is X86 specific, use 32-bit alignment for long long. + if (FD->getType()->isIntegerType() && FieldAlign > 32) + FieldAlign = 32; + + // Round up the current record size to the field's alignment boundary. + RecordSize = std::max(RecordSize, FieldSize); + + // Place this field at the start of the record. + FieldOffsets[i] = 0; + + // Remember max struct/class alignment. + RecordAlign = std::max(RecordAlign, FieldAlign); + } + } + + NewEntry->SetLayout(RecordSize, RecordAlign, FieldOffsets); + return *NewEntry; +} + +//===----------------------------------------------------------------------===// +// Type creation/memoization methods +//===----------------------------------------------------------------------===// + +QualType ASTContext::getASQualType(QualType T, unsigned AddressSpace) { + if (T.getCanonicalType().getAddressSpace() == AddressSpace) + return T; + + // Type's cannot have multiple ASQuals, therefore we know we only have to deal + // with CVR qualifiers from here on out. + assert(T.getCanonicalType().getAddressSpace() == 0 && + "Type is already address space qualified"); + + // Check if we've already instantiated an address space qual'd type of this + // type. + llvm::FoldingSetNodeID ID; + ASQualType::Profile(ID, T.getTypePtr(), AddressSpace); + void *InsertPos = 0; + if (ASQualType *ASQy = ASQualTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(ASQy, 0); + + // If the base type isn't canonical, this won't be a canonical type either, + // so fill in the canonical type field. + QualType Canonical; + if (!T->isCanonical()) { + Canonical = getASQualType(T.getCanonicalType(), AddressSpace); + + // Get the new insert position for the node we care about. + ASQualType *NewIP = ASQualTypes.FindNodeOrInsertPos(ID, InsertPos); + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + ASQualType *New = new ASQualType(T.getTypePtr(), Canonical, AddressSpace); + ASQualTypes.InsertNode(New, InsertPos); + Types.push_back(New); + return QualType(New, T.getCVRQualifiers()); +} + + +/// getComplexType - Return the uniqued reference to the type for a complex +/// number with the specified element type. +QualType ASTContext::getComplexType(QualType T) { + // Unique pointers, to guarantee there is only one pointer of a particular + // structure. + llvm::FoldingSetNodeID ID; + ComplexType::Profile(ID, T); + + void *InsertPos = 0; + if (ComplexType *CT = ComplexTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(CT, 0); + + // If the pointee type isn't canonical, this won't be a canonical type either, + // so fill in the canonical type field. + QualType Canonical; + if (!T->isCanonical()) { + Canonical = getComplexType(T.getCanonicalType()); + + // Get the new insert position for the node we care about. + ComplexType *NewIP = ComplexTypes.FindNodeOrInsertPos(ID, InsertPos); + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + ComplexType *New = new ComplexType(T, Canonical); + Types.push_back(New); + ComplexTypes.InsertNode(New, InsertPos); + return QualType(New, 0); +} + + +/// getPointerType - Return the uniqued reference to the type for a pointer to +/// the specified type. +QualType ASTContext::getPointerType(QualType T) { + // Unique pointers, to guarantee there is only one pointer of a particular + // structure. + llvm::FoldingSetNodeID ID; + PointerType::Profile(ID, T); + + void *InsertPos = 0; + if (PointerType *PT = PointerTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(PT, 0); + + // If the pointee type isn't canonical, this won't be a canonical type either, + // so fill in the canonical type field. + QualType Canonical; + if (!T->isCanonical()) { + Canonical = getPointerType(T.getCanonicalType()); + + // Get the new insert position for the node we care about. + PointerType *NewIP = PointerTypes.FindNodeOrInsertPos(ID, InsertPos); + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + PointerType *New = new PointerType(T, Canonical); + Types.push_back(New); + PointerTypes.InsertNode(New, InsertPos); + return QualType(New, 0); +} + +/// getReferenceType - Return the uniqued reference to the type for a reference +/// to the specified type. +QualType ASTContext::getReferenceType(QualType T) { + // Unique pointers, to guarantee there is only one pointer of a particular + // structure. + llvm::FoldingSetNodeID ID; + ReferenceType::Profile(ID, T); + + void *InsertPos = 0; + if (ReferenceType *RT = ReferenceTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(RT, 0); + + // If the referencee type isn't canonical, this won't be a canonical type + // either, so fill in the canonical type field. + QualType Canonical; + if (!T->isCanonical()) { + Canonical = getReferenceType(T.getCanonicalType()); + + // Get the new insert position for the node we care about. + ReferenceType *NewIP = ReferenceTypes.FindNodeOrInsertPos(ID, InsertPos); + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + + ReferenceType *New = new ReferenceType(T, Canonical); + Types.push_back(New); + ReferenceTypes.InsertNode(New, InsertPos); + return QualType(New, 0); +} + +/// getConstantArrayType - Return the unique reference to the type for an +/// array of the specified element type. +QualType ASTContext::getConstantArrayType(QualType EltTy, + const llvm::APInt &ArySize, + ArrayType::ArraySizeModifier ASM, + unsigned EltTypeQuals) { + llvm::FoldingSetNodeID ID; + ConstantArrayType::Profile(ID, EltTy, ArySize); + + void *InsertPos = 0; + if (ConstantArrayType *ATP = + ConstantArrayTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(ATP, 0); + + // If the element type isn't canonical, this won't be a canonical type either, + // so fill in the canonical type field. + QualType Canonical; + if (!EltTy->isCanonical()) { + Canonical = getConstantArrayType(EltTy.getCanonicalType(), ArySize, + ASM, EltTypeQuals); + // Get the new insert position for the node we care about. + ConstantArrayType *NewIP = + ConstantArrayTypes.FindNodeOrInsertPos(ID, InsertPos); + + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + + ConstantArrayType *New = new ConstantArrayType(EltTy, Canonical, ArySize, + ASM, EltTypeQuals); + ConstantArrayTypes.InsertNode(New, InsertPos); + Types.push_back(New); + return QualType(New, 0); +} + +/// getVariableArrayType - Returns a non-unique reference to the type for a +/// variable array of the specified element type. +QualType ASTContext::getVariableArrayType(QualType EltTy, Expr *NumElts, + ArrayType::ArraySizeModifier ASM, + unsigned EltTypeQuals) { + // Since we don't unique expressions, it isn't possible to unique VLA's + // that have an expression provided for their size. + + VariableArrayType *New = new VariableArrayType(EltTy, QualType(), NumElts, + ASM, EltTypeQuals); + + VariableArrayTypes.push_back(New); + Types.push_back(New); + return QualType(New, 0); +} + +QualType ASTContext::getIncompleteArrayType(QualType EltTy, + ArrayType::ArraySizeModifier ASM, + unsigned EltTypeQuals) { + llvm::FoldingSetNodeID ID; + IncompleteArrayType::Profile(ID, EltTy); + + void *InsertPos = 0; + if (IncompleteArrayType *ATP = + IncompleteArrayTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(ATP, 0); + + // If the element type isn't canonical, this won't be a canonical type + // either, so fill in the canonical type field. + QualType Canonical; + + if (!EltTy->isCanonical()) { + Canonical = getIncompleteArrayType(EltTy.getCanonicalType(), + ASM, EltTypeQuals); + + // Get the new insert position for the node we care about. + IncompleteArrayType *NewIP = + IncompleteArrayTypes.FindNodeOrInsertPos(ID, InsertPos); + + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + + IncompleteArrayType *New = new IncompleteArrayType(EltTy, Canonical, + ASM, EltTypeQuals); + + IncompleteArrayTypes.InsertNode(New, InsertPos); + Types.push_back(New); + return QualType(New, 0); +} + +/// getVectorType - Return the unique reference to a vector type of +/// the specified element type and size. VectorType must be a built-in type. +QualType ASTContext::getVectorType(QualType vecType, unsigned NumElts) { + BuiltinType *baseType; + + baseType = dyn_cast<BuiltinType>(vecType.getCanonicalType().getTypePtr()); + assert(baseType != 0 && "getVectorType(): Expecting a built-in type"); + + // Check if we've already instantiated a vector of this type. + llvm::FoldingSetNodeID ID; + VectorType::Profile(ID, vecType, NumElts, Type::Vector); + void *InsertPos = 0; + if (VectorType *VTP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(VTP, 0); + + // If the element type isn't canonical, this won't be a canonical type either, + // so fill in the canonical type field. + QualType Canonical; + if (!vecType->isCanonical()) { + Canonical = getVectorType(vecType.getCanonicalType(), NumElts); + + // Get the new insert position for the node we care about. + VectorType *NewIP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos); + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + VectorType *New = new VectorType(vecType, NumElts, Canonical); + VectorTypes.InsertNode(New, InsertPos); + Types.push_back(New); + return QualType(New, 0); +} + +/// getOCUVectorType - Return the unique reference to an OCU vector type of +/// the specified element type and size. VectorType must be a built-in type. +QualType ASTContext::getOCUVectorType(QualType vecType, unsigned NumElts) { + BuiltinType *baseType; + + baseType = dyn_cast<BuiltinType>(vecType.getCanonicalType().getTypePtr()); + assert(baseType != 0 && "getOCUVectorType(): Expecting a built-in type"); + + // Check if we've already instantiated a vector of this type. + llvm::FoldingSetNodeID ID; + VectorType::Profile(ID, vecType, NumElts, Type::OCUVector); + void *InsertPos = 0; + if (VectorType *VTP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(VTP, 0); + + // If the element type isn't canonical, this won't be a canonical type either, + // so fill in the canonical type field. + QualType Canonical; + if (!vecType->isCanonical()) { + Canonical = getOCUVectorType(vecType.getCanonicalType(), NumElts); + + // Get the new insert position for the node we care about. + VectorType *NewIP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos); + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + OCUVectorType *New = new OCUVectorType(vecType, NumElts, Canonical); + VectorTypes.InsertNode(New, InsertPos); + Types.push_back(New); + return QualType(New, 0); +} + +/// getFunctionTypeNoProto - Return a K&R style C function type like 'int()'. +/// +QualType ASTContext::getFunctionTypeNoProto(QualType ResultTy) { + // Unique functions, to guarantee there is only one function of a particular + // structure. + llvm::FoldingSetNodeID ID; + FunctionTypeNoProto::Profile(ID, ResultTy); + + void *InsertPos = 0; + if (FunctionTypeNoProto *FT = + FunctionTypeNoProtos.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(FT, 0); + + QualType Canonical; + if (!ResultTy->isCanonical()) { + Canonical = getFunctionTypeNoProto(ResultTy.getCanonicalType()); + + // Get the new insert position for the node we care about. + FunctionTypeNoProto *NewIP = + FunctionTypeNoProtos.FindNodeOrInsertPos(ID, InsertPos); + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + + FunctionTypeNoProto *New = new FunctionTypeNoProto(ResultTy, Canonical); + Types.push_back(New); + FunctionTypeNoProtos.InsertNode(New, InsertPos); + return QualType(New, 0); +} + +/// getFunctionType - Return a normal function type with a typed argument +/// list. isVariadic indicates whether the argument list includes '...'. +QualType ASTContext::getFunctionType(QualType ResultTy, QualType *ArgArray, + unsigned NumArgs, bool isVariadic) { + // Unique functions, to guarantee there is only one function of a particular + // structure. + llvm::FoldingSetNodeID ID; + FunctionTypeProto::Profile(ID, ResultTy, ArgArray, NumArgs, isVariadic); + + void *InsertPos = 0; + if (FunctionTypeProto *FTP = + FunctionTypeProtos.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(FTP, 0); + + // Determine whether the type being created is already canonical or not. + bool isCanonical = ResultTy->isCanonical(); + for (unsigned i = 0; i != NumArgs && isCanonical; ++i) + if (!ArgArray[i]->isCanonical()) + isCanonical = false; + + // If this type isn't canonical, get the canonical version of it. + QualType Canonical; + if (!isCanonical) { + llvm::SmallVector<QualType, 16> CanonicalArgs; + CanonicalArgs.reserve(NumArgs); + for (unsigned i = 0; i != NumArgs; ++i) + CanonicalArgs.push_back(ArgArray[i].getCanonicalType()); + + Canonical = getFunctionType(ResultTy.getCanonicalType(), + &CanonicalArgs[0], NumArgs, + isVariadic); + + // Get the new insert position for the node we care about. + FunctionTypeProto *NewIP = + FunctionTypeProtos.FindNodeOrInsertPos(ID, InsertPos); + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + + // FunctionTypeProto objects are not allocated with new because they have a + // variable size array (for parameter types) at the end of them. + FunctionTypeProto *FTP = + (FunctionTypeProto*)malloc(sizeof(FunctionTypeProto) + + NumArgs*sizeof(QualType)); + new (FTP) FunctionTypeProto(ResultTy, ArgArray, NumArgs, isVariadic, + Canonical); + Types.push_back(FTP); + FunctionTypeProtos.InsertNode(FTP, InsertPos); + return QualType(FTP, 0); +} + +/// getTypedefType - Return the unique reference to the type for the +/// specified typename decl. +QualType ASTContext::getTypedefType(TypedefDecl *Decl) { + if (Decl->TypeForDecl) return QualType(Decl->TypeForDecl, 0); + + QualType Canonical = Decl->getUnderlyingType().getCanonicalType(); + Decl->TypeForDecl = new TypedefType(Type::TypeName, Decl, Canonical); + Types.push_back(Decl->TypeForDecl); + return QualType(Decl->TypeForDecl, 0); +} + +/// getObjCInterfaceType - Return the unique reference to the type for the +/// specified ObjC interface decl. +QualType ASTContext::getObjCInterfaceType(ObjCInterfaceDecl *Decl) { + if (Decl->TypeForDecl) return QualType(Decl->TypeForDecl, 0); + + Decl->TypeForDecl = new ObjCInterfaceType(Type::ObjCInterface, Decl); + Types.push_back(Decl->TypeForDecl); + return QualType(Decl->TypeForDecl, 0); +} + +/// getObjCQualifiedInterfaceType - Return a +/// ObjCQualifiedInterfaceType type for the given interface decl and +/// the conforming protocol list. +QualType ASTContext::getObjCQualifiedInterfaceType(ObjCInterfaceDecl *Decl, + ObjCProtocolDecl **Protocols, unsigned NumProtocols) { + llvm::FoldingSetNodeID ID; + ObjCQualifiedInterfaceType::Profile(ID, Protocols, NumProtocols); + + void *InsertPos = 0; + if (ObjCQualifiedInterfaceType *QT = + ObjCQualifiedInterfaceTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(QT, 0); + + // No Match; + ObjCQualifiedInterfaceType *QType = + new ObjCQualifiedInterfaceType(Decl, Protocols, NumProtocols); + Types.push_back(QType); + ObjCQualifiedInterfaceTypes.InsertNode(QType, InsertPos); + return QualType(QType, 0); +} + +/// getObjCQualifiedIdType - Return a +/// getObjCQualifiedIdType type for the 'id' decl and +/// the conforming protocol list. +QualType ASTContext::getObjCQualifiedIdType(QualType idType, + ObjCProtocolDecl **Protocols, + unsigned NumProtocols) { + llvm::FoldingSetNodeID ID; + ObjCQualifiedIdType::Profile(ID, Protocols, NumProtocols); + + void *InsertPos = 0; + if (ObjCQualifiedIdType *QT = + ObjCQualifiedIdTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(QT, 0); + + // No Match; + QualType Canonical; + if (!idType->isCanonical()) { + Canonical = getObjCQualifiedIdType(idType.getCanonicalType(), + Protocols, NumProtocols); + ObjCQualifiedIdType *NewQT = + ObjCQualifiedIdTypes.FindNodeOrInsertPos(ID, InsertPos); + assert(NewQT == 0 && "Shouldn't be in the map!"); + } + + ObjCQualifiedIdType *QType = + new ObjCQualifiedIdType(Canonical, Protocols, NumProtocols); + Types.push_back(QType); + ObjCQualifiedIdTypes.InsertNode(QType, InsertPos); + return QualType(QType, 0); +} + +/// getTypeOfExpr - Unlike many "get<Type>" functions, we can't unique +/// TypeOfExpr AST's (since expression's are never shared). For example, +/// multiple declarations that refer to "typeof(x)" all contain different +/// DeclRefExpr's. This doesn't effect the type checker, since it operates +/// on canonical type's (which are always unique). +QualType ASTContext::getTypeOfExpr(Expr *tofExpr) { + QualType Canonical = tofExpr->getType().getCanonicalType(); + TypeOfExpr *toe = new TypeOfExpr(tofExpr, Canonical); + Types.push_back(toe); + return QualType(toe, 0); +} + +/// getTypeOfType - Unlike many "get<Type>" functions, we don't unique +/// TypeOfType AST's. The only motivation to unique these nodes would be +/// memory savings. Since typeof(t) is fairly uncommon, space shouldn't be +/// an issue. This doesn't effect the type checker, since it operates +/// on canonical type's (which are always unique). +QualType ASTContext::getTypeOfType(QualType tofType) { + QualType Canonical = tofType.getCanonicalType(); + TypeOfType *tot = new TypeOfType(tofType, Canonical); + Types.push_back(tot); + return QualType(tot, 0); +} + +/// getTagDeclType - Return the unique reference to the type for the +/// specified TagDecl (struct/union/class/enum) decl. +QualType ASTContext::getTagDeclType(TagDecl *Decl) { + assert (Decl); + + // The decl stores the type cache. + if (Decl->TypeForDecl) return QualType(Decl->TypeForDecl, 0); + + TagType* T = new TagType(Decl, QualType()); + Types.push_back(T); + Decl->TypeForDecl = T; + + return QualType(T, 0); +} + +/// getSizeType - Return the unique type for "size_t" (C99 7.17), the result +/// of the sizeof operator (C99 6.5.3.4p4). The value is target dependent and +/// needs to agree with the definition in <stddef.h>. +QualType ASTContext::getSizeType() const { + // On Darwin, size_t is defined as a "long unsigned int". + // FIXME: should derive from "Target". + return UnsignedLongTy; +} + +/// getWcharType - Return the unique type for "wchar_t" (C99 7.17), the +/// width of characters in wide strings, The value is target dependent and +/// needs to agree with the definition in <stddef.h>. +QualType ASTContext::getWcharType() const { + // On Darwin, wchar_t is defined as a "int". + // FIXME: should derive from "Target". + return IntTy; +} + +/// getPointerDiffType - Return the unique type for "ptrdiff_t" (ref?) +/// defined in <stddef.h>. Pointer - pointer requires this (C99 6.5.6p9). +QualType ASTContext::getPointerDiffType() const { + // On Darwin, ptrdiff_t is defined as a "int". This seems like a bug... + // FIXME: should derive from "Target". + return IntTy; +} + +/// getIntegerRank - Return an integer conversion rank (C99 6.3.1.1p1). This +/// routine will assert if passed a built-in type that isn't an integer or enum. +static int getIntegerRank(QualType t) { + if (const TagType *TT = dyn_cast<TagType>(t.getCanonicalType())) { + assert(TT->getDecl()->getKind() == Decl::Enum && "not an int or enum"); + return 4; + } + + const BuiltinType *BT = t.getCanonicalType()->getAsBuiltinType(); + switch (BT->getKind()) { + default: + assert(0 && "getIntegerRank(): not a built-in integer"); + case BuiltinType::Bool: + return 1; + case BuiltinType::Char_S: + case BuiltinType::Char_U: + case BuiltinType::SChar: + case BuiltinType::UChar: + return 2; + case BuiltinType::Short: + case BuiltinType::UShort: + return 3; + case BuiltinType::Int: + case BuiltinType::UInt: + return 4; + case BuiltinType::Long: + case BuiltinType::ULong: + return 5; + case BuiltinType::LongLong: + case BuiltinType::ULongLong: + return 6; + } +} + +/// getFloatingRank - Return a relative rank for floating point types. +/// This routine will assert if passed a built-in type that isn't a float. +static int getFloatingRank(QualType T) { + T = T.getCanonicalType(); + if (const ComplexType *CT = T->getAsComplexType()) + return getFloatingRank(CT->getElementType()); + + switch (T->getAsBuiltinType()->getKind()) { + default: assert(0 && "getFloatingRank(): not a floating type"); + case BuiltinType::Float: return FloatRank; + case BuiltinType::Double: return DoubleRank; + case BuiltinType::LongDouble: return LongDoubleRank; + } +} + +/// getFloatingTypeOfSizeWithinDomain - Returns a real floating +/// point or a complex type (based on typeDomain/typeSize). +/// 'typeDomain' is a real floating point or complex type. +/// 'typeSize' is a real floating point or complex type. +QualType ASTContext::getFloatingTypeOfSizeWithinDomain( + QualType typeSize, QualType typeDomain) const { + if (typeDomain->isComplexType()) { + switch (getFloatingRank(typeSize)) { + default: assert(0 && "getFloatingRank(): illegal value for rank"); + case FloatRank: return FloatComplexTy; + case DoubleRank: return DoubleComplexTy; + case LongDoubleRank: return LongDoubleComplexTy; + } + } + if (typeDomain->isRealFloatingType()) { + switch (getFloatingRank(typeSize)) { + default: assert(0 && "getFloatingRank(): illegal value for rank"); + case FloatRank: return FloatTy; + case DoubleRank: return DoubleTy; + case LongDoubleRank: return LongDoubleTy; + } + } + assert(0 && "getFloatingTypeOfSizeWithinDomain(): illegal domain"); + //an invalid return value, but the assert + //will ensure that this code is never reached. + return VoidTy; +} + +/// compareFloatingType - Handles 3 different combos: +/// float/float, float/complex, complex/complex. +/// If lt > rt, return 1. If lt == rt, return 0. If lt < rt, return -1. +int ASTContext::compareFloatingType(QualType lt, QualType rt) { + if (getFloatingRank(lt) == getFloatingRank(rt)) + return 0; + if (getFloatingRank(lt) > getFloatingRank(rt)) + return 1; + return -1; +} + +// maxIntegerType - Returns the highest ranked integer type. Handles 3 case: +// unsigned/unsigned, signed/signed, signed/unsigned. C99 6.3.1.8p1. +QualType ASTContext::maxIntegerType(QualType lhs, QualType rhs) { + if (lhs == rhs) return lhs; + + bool t1Unsigned = lhs->isUnsignedIntegerType(); + bool t2Unsigned = rhs->isUnsignedIntegerType(); + + if ((t1Unsigned && t2Unsigned) || (!t1Unsigned && !t2Unsigned)) + return getIntegerRank(lhs) >= getIntegerRank(rhs) ? lhs : rhs; + + // We have two integer types with differing signs + QualType unsignedType = t1Unsigned ? lhs : rhs; + QualType signedType = t1Unsigned ? rhs : lhs; + + if (getIntegerRank(unsignedType) >= getIntegerRank(signedType)) + return unsignedType; + else { + // FIXME: Need to check if the signed type can represent all values of the + // unsigned type. If it can, then the result is the signed type. + // If it can't, then the result is the unsigned version of the signed type. + // Should probably add a helper that returns a signed integer type from + // an unsigned (and vice versa). C99 6.3.1.8. + return signedType; + } +} + +// getCFConstantStringType - Return the type used for constant CFStrings. +QualType ASTContext::getCFConstantStringType() { + if (!CFConstantStringTypeDecl) { + CFConstantStringTypeDecl = + RecordDecl::Create(*this, Decl::Struct, SourceLocation(), + &Idents.get("NSConstantString"), 0); + QualType FieldTypes[4]; + + // const int *isa; + FieldTypes[0] = getPointerType(IntTy.getQualifiedType(QualType::Const)); + // int flags; + FieldTypes[1] = IntTy; + // const char *str; + FieldTypes[2] = getPointerType(CharTy.getQualifiedType(QualType::Const)); + // long length; + FieldTypes[3] = LongTy; + // Create fields + FieldDecl *FieldDecls[4]; + + for (unsigned i = 0; i < 4; ++i) + FieldDecls[i] = new FieldDecl(SourceLocation(), 0, FieldTypes[i]); + + CFConstantStringTypeDecl->defineBody(FieldDecls, 4); + } + + return getTagDeclType(CFConstantStringTypeDecl); +} + +// This returns true if a type has been typedefed to BOOL: +// typedef <type> BOOL; +static bool isTypeTypedefedAsBOOL(QualType T) { + if (const TypedefType *TT = dyn_cast<TypedefType>(T)) + return !strcmp(TT->getDecl()->getName(), "BOOL"); + + return false; +} + +/// getObjCEncodingTypeSize returns size of type for objective-c encoding +/// purpose. +int ASTContext::getObjCEncodingTypeSize(QualType type) { + uint64_t sz = getTypeSize(type); + + // Make all integer and enum types at least as large as an int + if (sz > 0 && type->isIntegralType()) + sz = std::max(sz, getTypeSize(IntTy)); + // Treat arrays as pointers, since that's how they're passed in. + else if (type->isArrayType()) + sz = getTypeSize(VoidPtrTy); + return sz / getTypeSize(CharTy); +} + +/// getObjCEncodingForMethodDecl - Return the encoded type for this method +/// declaration. +void ASTContext::getObjCEncodingForMethodDecl(ObjCMethodDecl *Decl, + std::string& S) +{ + // Encode type qualifer, 'in', 'inout', etc. for the return type. + getObjCEncodingForTypeQualifier(Decl->getObjCDeclQualifier(), S); + // Encode result type. + getObjCEncodingForType(Decl->getResultType(), S, EncodingRecordTypes); + // Compute size of all parameters. + // Start with computing size of a pointer in number of bytes. + // FIXME: There might(should) be a better way of doing this computation! + SourceLocation Loc; + int PtrSize = getTypeSize(VoidPtrTy) / getTypeSize(CharTy); + // The first two arguments (self and _cmd) are pointers; account for + // their size. + int ParmOffset = 2 * PtrSize; + int NumOfParams = Decl->getNumParams(); + for (int i = 0; i < NumOfParams; i++) { + QualType PType = Decl->getParamDecl(i)->getType(); + int sz = getObjCEncodingTypeSize (PType); + assert (sz > 0 && "getObjCEncodingForMethodDecl - Incomplete param type"); + ParmOffset += sz; + } + S += llvm::utostr(ParmOffset); + S += "@0:"; + S += llvm::utostr(PtrSize); + + // Argument types. + ParmOffset = 2 * PtrSize; + for (int i = 0; i < NumOfParams; i++) { + QualType PType = Decl->getParamDecl(i)->getType(); + // Process argument qualifiers for user supplied arguments; such as, + // 'in', 'inout', etc. + getObjCEncodingForTypeQualifier( + Decl->getParamDecl(i)->getObjCDeclQualifier(), S); + getObjCEncodingForType(PType, S, EncodingRecordTypes); + S += llvm::utostr(ParmOffset); + ParmOffset += getObjCEncodingTypeSize(PType); + } +} + +void ASTContext::getObjCEncodingForType(QualType T, std::string& S, + llvm::SmallVector<const RecordType *, 8> &ERType) const +{ + // FIXME: This currently doesn't encode: + // @ An object (whether statically typed or typed id) + // # A class object (Class) + // : A method selector (SEL) + // {name=type...} A structure + // (name=type...) A union + // bnum A bit field of num bits + + if (const BuiltinType *BT = T->getAsBuiltinType()) { + char encoding; + switch (BT->getKind()) { + case BuiltinType::Void: + encoding = 'v'; + break; + case BuiltinType::Bool: + encoding = 'B'; + break; + case BuiltinType::Char_U: + case BuiltinType::UChar: + encoding = 'C'; + break; + case BuiltinType::UShort: + encoding = 'S'; + break; + case BuiltinType::UInt: + encoding = 'I'; + break; + case BuiltinType::ULong: + encoding = 'L'; + break; + case BuiltinType::ULongLong: + encoding = 'Q'; + break; + case BuiltinType::Char_S: + case BuiltinType::SChar: + encoding = 'c'; + break; + case BuiltinType::Short: + encoding = 's'; + break; + case BuiltinType::Int: + encoding = 'i'; + break; + case BuiltinType::Long: + encoding = 'l'; + break; + case BuiltinType::LongLong: + encoding = 'q'; + break; + case BuiltinType::Float: + encoding = 'f'; + break; + case BuiltinType::Double: + encoding = 'd'; + break; + case BuiltinType::LongDouble: + encoding = 'd'; + break; + default: + assert(0 && "Unhandled builtin type kind"); + } + + S += encoding; + } + else if (T->isObjCQualifiedIdType()) { + // Treat id<P...> same as 'id' for encoding purposes. + return getObjCEncodingForType(getObjCIdType(), S, ERType); + + } + else if (const PointerType *PT = T->getAsPointerType()) { + QualType PointeeTy = PT->getPointeeType(); + if (isObjCIdType(PointeeTy) || PointeeTy->isObjCInterfaceType()) { + S += '@'; + return; + } else if (isObjCClassType(PointeeTy)) { + S += '#'; + return; + } else if (isObjCSelType(PointeeTy)) { + S += ':'; + return; + } + + if (PointeeTy->isCharType()) { + // char pointer types should be encoded as '*' unless it is a + // type that has been typedef'd to 'BOOL'. + if (!isTypeTypedefedAsBOOL(PointeeTy)) { + S += '*'; + return; + } + } + + S += '^'; + getObjCEncodingForType(PT->getPointeeType(), S, ERType); + } else if (const ArrayType *AT = T->getAsArrayType()) { + S += '['; + + if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT)) + S += llvm::utostr(CAT->getSize().getZExtValue()); + else + assert(0 && "Unhandled array type!"); + + getObjCEncodingForType(AT->getElementType(), S, ERType); + S += ']'; + } else if (T->getAsFunctionType()) { + S += '?'; + } else if (const RecordType *RTy = T->getAsRecordType()) { + RecordDecl *RDecl= RTy->getDecl(); + S += '{'; + S += RDecl->getName(); + bool found = false; + for (unsigned i = 0, e = ERType.size(); i != e; ++i) + if (ERType[i] == RTy) { + found = true; + break; + } + if (!found) { + ERType.push_back(RTy); + S += '='; + for (int i = 0; i < RDecl->getNumMembers(); i++) { + FieldDecl *field = RDecl->getMember(i); + getObjCEncodingForType(field->getType(), S, ERType); + } + assert(ERType.back() == RTy && "Record Type stack mismatch."); + ERType.pop_back(); + } + S += '}'; + } else if (T->isEnumeralType()) { + S += 'i'; + } else + assert(0 && "@encode for type not implemented!"); +} + +void ASTContext::getObjCEncodingForTypeQualifier(Decl::ObjCDeclQualifier QT, + std::string& S) const { + if (QT & Decl::OBJC_TQ_In) + S += 'n'; + if (QT & Decl::OBJC_TQ_Inout) + S += 'N'; + if (QT & Decl::OBJC_TQ_Out) + S += 'o'; + if (QT & Decl::OBJC_TQ_Bycopy) + S += 'O'; + if (QT & Decl::OBJC_TQ_Byref) + S += 'R'; + if (QT & Decl::OBJC_TQ_Oneway) + S += 'V'; +} + +void ASTContext::setBuiltinVaListType(QualType T) +{ + assert(BuiltinVaListType.isNull() && "__builtin_va_list type already set!"); + + BuiltinVaListType = T; +} + +void ASTContext::setObjCIdType(TypedefDecl *TD) +{ + assert(ObjCIdType.isNull() && "'id' type already set!"); + + ObjCIdType = getTypedefType(TD); + + // typedef struct objc_object *id; + const PointerType *ptr = TD->getUnderlyingType()->getAsPointerType(); + assert(ptr && "'id' incorrectly typed"); + const RecordType *rec = ptr->getPointeeType()->getAsStructureType(); + assert(rec && "'id' incorrectly typed"); + IdStructType = rec; +} + +void ASTContext::setObjCSelType(TypedefDecl *TD) +{ + assert(ObjCSelType.isNull() && "'SEL' type already set!"); + + ObjCSelType = getTypedefType(TD); + + // typedef struct objc_selector *SEL; + const PointerType *ptr = TD->getUnderlyingType()->getAsPointerType(); + assert(ptr && "'SEL' incorrectly typed"); + const RecordType *rec = ptr->getPointeeType()->getAsStructureType(); + assert(rec && "'SEL' incorrectly typed"); + SelStructType = rec; +} + +void ASTContext::setObjCProtoType(QualType QT) +{ + assert(ObjCProtoType.isNull() && "'Protocol' type already set!"); + ObjCProtoType = QT; +} + +void ASTContext::setObjCClassType(TypedefDecl *TD) +{ + assert(ObjCClassType.isNull() && "'Class' type already set!"); + + ObjCClassType = getTypedefType(TD); + + // typedef struct objc_class *Class; + const PointerType *ptr = TD->getUnderlyingType()->getAsPointerType(); + assert(ptr && "'Class' incorrectly typed"); + const RecordType *rec = ptr->getPointeeType()->getAsStructureType(); + assert(rec && "'Class' incorrectly typed"); + ClassStructType = rec; +} + +void ASTContext::setObjCConstantStringInterface(ObjCInterfaceDecl *Decl) { + assert(ObjCConstantStringType.isNull() && + "'NSConstantString' type already set!"); + + ObjCConstantStringType = getObjCInterfaceType(Decl); +} + +bool ASTContext::builtinTypesAreCompatible(QualType lhs, QualType rhs) { + const BuiltinType *lBuiltin = lhs->getAsBuiltinType(); + const BuiltinType *rBuiltin = rhs->getAsBuiltinType(); + + return lBuiltin->getKind() == rBuiltin->getKind(); +} + +/// objcTypesAreCompatible - This routine is called when two types +/// are of different class; one is interface type or is +/// a qualified interface type and the other type is of a different class. +/// Example, II or II<P>. +bool ASTContext::objcTypesAreCompatible(QualType lhs, QualType rhs) { + if (lhs->isObjCInterfaceType() && isObjCIdType(rhs)) + return true; + else if (isObjCIdType(lhs) && rhs->isObjCInterfaceType()) + return true; + if (ObjCInterfaceType *lhsIT = + dyn_cast<ObjCInterfaceType>(lhs.getCanonicalType().getTypePtr())) { + ObjCQualifiedInterfaceType *rhsQI = + dyn_cast<ObjCQualifiedInterfaceType>(rhs.getCanonicalType().getTypePtr()); + return rhsQI && (lhsIT->getDecl() == rhsQI->getDecl()); + } + else if (ObjCInterfaceType *rhsIT = + dyn_cast<ObjCInterfaceType>(rhs.getCanonicalType().getTypePtr())) { + ObjCQualifiedInterfaceType *lhsQI = + dyn_cast<ObjCQualifiedInterfaceType>(lhs.getCanonicalType().getTypePtr()); + return lhsQI && (rhsIT->getDecl() == lhsQI->getDecl()); + } + return false; +} + +/// Check that 'lhs' and 'rhs' are compatible interface types. Both types +/// must be canonical types. +bool ASTContext::interfaceTypesAreCompatible(QualType lhs, QualType rhs) { + assert (lhs->isCanonical() && + "interfaceTypesAreCompatible strip typedefs of lhs"); + assert (rhs->isCanonical() && + "interfaceTypesAreCompatible strip typedefs of rhs"); + if (lhs == rhs) + return true; + ObjCInterfaceType *lhsIT = cast<ObjCInterfaceType>(lhs.getTypePtr()); + ObjCInterfaceType *rhsIT = cast<ObjCInterfaceType>(rhs.getTypePtr()); + ObjCInterfaceDecl *rhsIDecl = rhsIT->getDecl(); + ObjCInterfaceDecl *lhsIDecl = lhsIT->getDecl(); + // rhs is derived from lhs it is OK; else it is not OK. + while (rhsIDecl != NULL) { + if (rhsIDecl == lhsIDecl) + return true; + rhsIDecl = rhsIDecl->getSuperClass(); + } + return false; +} + +bool ASTContext::QualifiedInterfaceTypesAreCompatible(QualType lhs, + QualType rhs) { + ObjCQualifiedInterfaceType *lhsQI = + dyn_cast<ObjCQualifiedInterfaceType>(lhs.getCanonicalType().getTypePtr()); + assert(lhsQI && "QualifiedInterfaceTypesAreCompatible - bad lhs type"); + ObjCQualifiedInterfaceType *rhsQI = + dyn_cast<ObjCQualifiedInterfaceType>(rhs.getCanonicalType().getTypePtr()); + assert(rhsQI && "QualifiedInterfaceTypesAreCompatible - bad rhs type"); + if (!interfaceTypesAreCompatible( + getObjCInterfaceType(lhsQI->getDecl()).getCanonicalType(), + getObjCInterfaceType(rhsQI->getDecl()).getCanonicalType())) + return false; + /* All protocols in lhs must have a presense in rhs. */ + for (unsigned i =0; i < lhsQI->getNumProtocols(); i++) { + bool match = false; + ObjCProtocolDecl *lhsProto = lhsQI->getProtocols(i); + for (unsigned j = 0; j < rhsQI->getNumProtocols(); j++) { + ObjCProtocolDecl *rhsProto = rhsQI->getProtocols(j); + if (lhsProto == rhsProto) { + match = true; + break; + } + } + if (!match) + return false; + } + return true; +} + +/// ProtocolCompatibleWithProtocol - return 'true' if 'lProto' is in the +/// inheritance hierarchy of 'rProto'. +static bool ProtocolCompatibleWithProtocol(ObjCProtocolDecl *lProto, + ObjCProtocolDecl *rProto) { + if (lProto == rProto) + return true; + ObjCProtocolDecl** RefPDecl = rProto->getReferencedProtocols(); + for (unsigned i = 0; i < rProto->getNumReferencedProtocols(); i++) + if (ProtocolCompatibleWithProtocol(lProto, RefPDecl[i])) + return true; + return false; +} + +/// ClassImplementsProtocol - Checks that 'lProto' protocol +/// has been implemented in IDecl class, its super class or categories (if +/// lookupCategory is true). +static bool ClassImplementsProtocol(ObjCProtocolDecl *lProto, + ObjCInterfaceDecl *IDecl, + bool lookupCategory) { + + // 1st, look up the class. + ObjCProtocolDecl **protoList = IDecl->getReferencedProtocols(); + for (unsigned i = 0; i < IDecl->getNumIntfRefProtocols(); i++) { + if (ProtocolCompatibleWithProtocol(lProto, protoList[i])) + return true; + } + + // 2nd, look up the category. + if (lookupCategory) + for (ObjCCategoryDecl *CDecl = IDecl->getCategoryList(); CDecl; + CDecl = CDecl->getNextClassCategory()) { + protoList = CDecl->getReferencedProtocols(); + for (unsigned i = 0; i < CDecl->getNumReferencedProtocols(); i++) { + if (ProtocolCompatibleWithProtocol(lProto, protoList[i])) + return true; + } + } + + // 3rd, look up the super class(s) + if (IDecl->getSuperClass()) + return + ClassImplementsProtocol(lProto, IDecl->getSuperClass(), lookupCategory); + + return false; +} + +/// ObjCQualifiedIdTypesAreCompatible - Compares two types, at least +/// one of which is a protocol qualified 'id' type. When 'compare' +/// is true it is for comparison; when false, for assignment/initialization. +bool ASTContext::ObjCQualifiedIdTypesAreCompatible(QualType lhs, + QualType rhs, + bool compare) { + // match id<P..> with an 'id' type in all cases. + if (const PointerType *PT = lhs->getAsPointerType()) { + QualType PointeeTy = PT->getPointeeType(); + if (isObjCIdType(PointeeTy) || PointeeTy->isVoidType()) + return true; + + } + else if (const PointerType *PT = rhs->getAsPointerType()) { + QualType PointeeTy = PT->getPointeeType(); + if (isObjCIdType(PointeeTy) || PointeeTy->isVoidType()) + return true; + + } + + ObjCQualifiedInterfaceType *lhsQI = 0; + ObjCQualifiedInterfaceType *rhsQI = 0; + ObjCInterfaceDecl *lhsID = 0; + ObjCInterfaceDecl *rhsID = 0; + ObjCQualifiedIdType *lhsQID = dyn_cast<ObjCQualifiedIdType>(lhs); + ObjCQualifiedIdType *rhsQID = dyn_cast<ObjCQualifiedIdType>(rhs); + + if (lhsQID) { + if (!rhsQID && rhs->getTypeClass() == Type::Pointer) { + QualType rtype = + cast<PointerType>(rhs.getCanonicalType())->getPointeeType(); + rhsQI = + dyn_cast<ObjCQualifiedInterfaceType>( + rtype.getCanonicalType().getTypePtr()); + if (!rhsQI) { + ObjCInterfaceType *IT = dyn_cast<ObjCInterfaceType>( + rtype.getCanonicalType().getTypePtr()); + if (IT) + rhsID = IT->getDecl(); + } + } + if (!rhsQI && !rhsQID && !rhsID) + return false; + + unsigned numRhsProtocols = 0; + ObjCProtocolDecl **rhsProtoList = 0; + if (rhsQI) { + numRhsProtocols = rhsQI->getNumProtocols(); + rhsProtoList = rhsQI->getReferencedProtocols(); + } + else if (rhsQID) { + numRhsProtocols = rhsQID->getNumProtocols(); + rhsProtoList = rhsQID->getReferencedProtocols(); + } + + for (unsigned i =0; i < lhsQID->getNumProtocols(); i++) { + ObjCProtocolDecl *lhsProto = lhsQID->getProtocols(i); + bool match = false; + + // when comparing an id<P> on lhs with a static type on rhs, + // see if static class implements all of id's protocols, directly or + // through its super class and categories. + if (rhsID) { + if (ClassImplementsProtocol(lhsProto, rhsID, true)) + match = true; + } + else for (unsigned j = 0; j < numRhsProtocols; j++) { + ObjCProtocolDecl *rhsProto = rhsProtoList[j]; + if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto) || + compare && ProtocolCompatibleWithProtocol(rhsProto, lhsProto)) { + match = true; + break; + } + } + if (!match) + return false; + } + } + else if (rhsQID) { + if (!lhsQID && lhs->getTypeClass() == Type::Pointer) { + QualType ltype = + cast<PointerType>(lhs.getCanonicalType())->getPointeeType(); + lhsQI = + dyn_cast<ObjCQualifiedInterfaceType>( + ltype.getCanonicalType().getTypePtr()); + if (!lhsQI) { + ObjCInterfaceType *IT = dyn_cast<ObjCInterfaceType>( + ltype.getCanonicalType().getTypePtr()); + if (IT) + lhsID = IT->getDecl(); + } + } + if (!lhsQI && !lhsQID && !lhsID) + return false; + + unsigned numLhsProtocols = 0; + ObjCProtocolDecl **lhsProtoList = 0; + if (lhsQI) { + numLhsProtocols = lhsQI->getNumProtocols(); + lhsProtoList = lhsQI->getReferencedProtocols(); + } + else if (lhsQID) { + numLhsProtocols = lhsQID->getNumProtocols(); + lhsProtoList = lhsQID->getReferencedProtocols(); + } + bool match = false; + // for static type vs. qualified 'id' type, check that class implements + // one of 'id's protocols. + if (lhsID) { + for (unsigned j = 0; j < rhsQID->getNumProtocols(); j++) { + ObjCProtocolDecl *rhsProto = rhsQID->getProtocols(j); + if (ClassImplementsProtocol(rhsProto, lhsID, compare)) { + match = true; + break; + } + } + } + else for (unsigned i =0; i < numLhsProtocols; i++) { + match = false; + ObjCProtocolDecl *lhsProto = lhsProtoList[i]; + for (unsigned j = 0; j < rhsQID->getNumProtocols(); j++) { + ObjCProtocolDecl *rhsProto = rhsQID->getProtocols(j); + if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto) || + compare && ProtocolCompatibleWithProtocol(rhsProto, lhsProto)) { + match = true; + break; + } + } + } + if (!match) + return false; + } + return true; +} + +bool ASTContext::vectorTypesAreCompatible(QualType lhs, QualType rhs) { + const VectorType *lVector = lhs->getAsVectorType(); + const VectorType *rVector = rhs->getAsVectorType(); + + if ((lVector->getElementType().getCanonicalType() == + rVector->getElementType().getCanonicalType()) && + (lVector->getNumElements() == rVector->getNumElements())) + return true; + return false; +} + +// C99 6.2.7p1: If both are complete types, then the following additional +// requirements apply...FIXME (handle compatibility across source files). +bool ASTContext::tagTypesAreCompatible(QualType lhs, QualType rhs) { + // "Class" and "id" are compatible built-in structure types. + if (isObjCIdType(lhs) && isObjCClassType(rhs) || + isObjCClassType(lhs) && isObjCIdType(rhs)) + return true; + + // Within a translation unit a tag type is + // only compatible with itself. + return lhs.getCanonicalType() == rhs.getCanonicalType(); +} + +bool ASTContext::pointerTypesAreCompatible(QualType lhs, QualType rhs) { + // C99 6.7.5.1p2: For two pointer types to be compatible, both shall be + // identically qualified and both shall be pointers to compatible types. + if (lhs.getCVRQualifiers() != rhs.getCVRQualifiers() || + lhs.getAddressSpace() != rhs.getAddressSpace()) + return false; + + QualType ltype = cast<PointerType>(lhs.getCanonicalType())->getPointeeType(); + QualType rtype = cast<PointerType>(rhs.getCanonicalType())->getPointeeType(); + + return typesAreCompatible(ltype, rtype); +} + +// C++ 5.17p6: When the left operand of an assignment operator denotes a +// reference to T, the operation assigns to the object of type T denoted by the +// reference. +bool ASTContext::referenceTypesAreCompatible(QualType lhs, QualType rhs) { + QualType ltype = lhs; + + if (lhs->isReferenceType()) + ltype = cast<ReferenceType>(lhs.getCanonicalType())->getReferenceeType(); + + QualType rtype = rhs; + + if (rhs->isReferenceType()) + rtype = cast<ReferenceType>(rhs.getCanonicalType())->getReferenceeType(); + + return typesAreCompatible(ltype, rtype); +} + +bool ASTContext::functionTypesAreCompatible(QualType lhs, QualType rhs) { + const FunctionType *lbase = cast<FunctionType>(lhs.getCanonicalType()); + const FunctionType *rbase = cast<FunctionType>(rhs.getCanonicalType()); + const FunctionTypeProto *lproto = dyn_cast<FunctionTypeProto>(lbase); + const FunctionTypeProto *rproto = dyn_cast<FunctionTypeProto>(rbase); + + // first check the return types (common between C99 and K&R). + if (!typesAreCompatible(lbase->getResultType(), rbase->getResultType())) + return false; + + if (lproto && rproto) { // two C99 style function prototypes + unsigned lproto_nargs = lproto->getNumArgs(); + unsigned rproto_nargs = rproto->getNumArgs(); + + if (lproto_nargs != rproto_nargs) + return false; + + // both prototypes have the same number of arguments. + if ((lproto->isVariadic() && !rproto->isVariadic()) || + (rproto->isVariadic() && !lproto->isVariadic())) + return false; + + // The use of ellipsis agree...now check the argument types. + for (unsigned i = 0; i < lproto_nargs; i++) + // C99 6.7.5.3p15: ...and each parameter declared with qualified type + // is taken as having the unqualified version of it's declared type. + if (!typesAreCompatible(lproto->getArgType(i).getUnqualifiedType(), + rproto->getArgType(i).getUnqualifiedType())) + return false; + return true; + } + if (!lproto && !rproto) // two K&R style function decls, nothing to do. + return true; + + // we have a mixture of K&R style with C99 prototypes + const FunctionTypeProto *proto = lproto ? lproto : rproto; + + if (proto->isVariadic()) + return false; + + // FIXME: Each parameter type T in the prototype must be compatible with the + // type resulting from applying the usual argument conversions to T. + return true; +} + +bool ASTContext::arrayTypesAreCompatible(QualType lhs, QualType rhs) { + // Compatible arrays must have compatible element types + QualType ltype = lhs->getAsArrayType()->getElementType(); + QualType rtype = rhs->getAsArrayType()->getElementType(); + + if (!typesAreCompatible(ltype, rtype)) + return false; + + // Compatible arrays must be the same size + if (const ConstantArrayType* LCAT = lhs->getAsConstantArrayType()) + if (const ConstantArrayType* RCAT = rhs->getAsConstantArrayType()) + return RCAT->getSize() == LCAT->getSize(); + + return true; +} + +/// typesAreCompatible - C99 6.7.3p9: For two qualified types to be compatible, +/// both shall have the identically qualified version of a compatible type. +/// C99 6.2.7p1: Two types have compatible types if their types are the +/// same. See 6.7.[2,3,5] for additional rules. +bool ASTContext::typesAreCompatible(QualType lhs, QualType rhs) { + if (lhs.getCVRQualifiers() != rhs.getCVRQualifiers() || + lhs.getAddressSpace() != rhs.getAddressSpace()) + return false; + + QualType lcanon = lhs.getCanonicalType(); + QualType rcanon = rhs.getCanonicalType(); + + // If two types are identical, they are are compatible + if (lcanon == rcanon) + return true; + + // C++ [expr]: If an expression initially has the type "reference to T", the + // type is adjusted to "T" prior to any further analysis, the expression + // designates the object or function denoted by the reference, and the + // expression is an lvalue. + if (ReferenceType *RT = dyn_cast<ReferenceType>(lcanon)) + lcanon = RT->getReferenceeType(); + if (ReferenceType *RT = dyn_cast<ReferenceType>(rcanon)) + rcanon = RT->getReferenceeType(); + + Type::TypeClass LHSClass = lcanon->getTypeClass(); + Type::TypeClass RHSClass = rcanon->getTypeClass(); + + // We want to consider the two function types to be the same for these + // comparisons, just force one to the other. + if (LHSClass == Type::FunctionProto) LHSClass = Type::FunctionNoProto; + if (RHSClass == Type::FunctionProto) RHSClass = Type::FunctionNoProto; + + // Same as above for arrays + if (LHSClass == Type::VariableArray) LHSClass = Type::ConstantArray; + if (RHSClass == Type::VariableArray) RHSClass = Type::ConstantArray; + if (LHSClass == Type::IncompleteArray) LHSClass = Type::ConstantArray; + if (RHSClass == Type::IncompleteArray) RHSClass = Type::ConstantArray; + + // If the canonical type classes don't match... + if (LHSClass != RHSClass) { + // For Objective-C, it is possible for two types to be compatible + // when their classes don't match (when dealing with "id"). If either type + // is an interface, we defer to objcTypesAreCompatible(). + if (lcanon->isObjCInterfaceType() || rcanon->isObjCInterfaceType()) + return objcTypesAreCompatible(lcanon, rcanon); + + // C99 6.7.2.2p4: Each enumerated type shall be compatible with char, + // a signed integer type, or an unsigned integer type. + if (lcanon->isEnumeralType() && rcanon->isIntegralType()) { + EnumDecl* EDecl = cast<EnumDecl>(cast<TagType>(lcanon)->getDecl()); + return EDecl->getIntegerType() == rcanon; + } + if (rcanon->isEnumeralType() && lcanon->isIntegralType()) { + EnumDecl* EDecl = cast<EnumDecl>(cast<TagType>(rcanon)->getDecl()); + return EDecl->getIntegerType() == lcanon; + } + + return false; + } + // The canonical type classes match. + switch (LHSClass) { + case Type::FunctionProto: assert(0 && "Canonicalized away above"); + case Type::Pointer: + return pointerTypesAreCompatible(lcanon, rcanon); + case Type::ConstantArray: + case Type::VariableArray: + case Type::IncompleteArray: + return arrayTypesAreCompatible(lcanon, rcanon); + case Type::FunctionNoProto: + return functionTypesAreCompatible(lcanon, rcanon); + case Type::Tagged: // handle structures, unions + return tagTypesAreCompatible(lcanon, rcanon); + case Type::Builtin: + return builtinTypesAreCompatible(lcanon, rcanon); + case Type::ObjCInterface: + return interfaceTypesAreCompatible(lcanon, rcanon); + case Type::Vector: + case Type::OCUVector: + return vectorTypesAreCompatible(lcanon, rcanon); + case Type::ObjCQualifiedInterface: + return QualifiedInterfaceTypesAreCompatible(lcanon, rcanon); + default: + assert(0 && "unexpected type"); + } + return true; // should never get here... +} + +/// Emit - Serialize an ASTContext object to Bitcode. +void ASTContext::Emit(llvm::Serializer& S) const { + S.EmitRef(SourceMgr); + S.EmitRef(Target); + S.EmitRef(Idents); + S.EmitRef(Selectors); + + // Emit the size of the type vector so that we can reserve that size + // when we reconstitute the ASTContext object. + S.EmitInt(Types.size()); + + for (std::vector<Type*>::const_iterator I=Types.begin(), E=Types.end(); + I!=E;++I) + (*I)->Emit(S); + + // FIXME: S.EmitOwnedPtr(CFConstantStringTypeDecl); +} + +ASTContext* ASTContext::Create(llvm::Deserializer& D) { + SourceManager &SM = D.ReadRef<SourceManager>(); + TargetInfo &t = D.ReadRef<TargetInfo>(); + IdentifierTable &idents = D.ReadRef<IdentifierTable>(); + SelectorTable &sels = D.ReadRef<SelectorTable>(); + + unsigned size_reserve = D.ReadInt(); + + ASTContext* A = new ASTContext(SM,t,idents,sels,size_reserve); + + for (unsigned i = 0; i < size_reserve; ++i) + Type::Create(*A,i,D); + + // FIXME: A->CFConstantStringTypeDecl = D.ReadOwnedPtr<RecordDecl>(); + + return A; +} diff --git a/clang/lib/AST/Builtins.cpp b/clang/lib/AST/Builtins.cpp new file mode 100644 index 00000000000..e2bf5ca007b --- /dev/null +++ b/clang/lib/AST/Builtins.cpp @@ -0,0 +1,195 @@ +//===--- Builtins.cpp - Builtin function implementation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements various things for builtin functions. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Builtins.h" +#include "clang/AST/ASTContext.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/TargetInfo.h" +using namespace clang; + +static const Builtin::Info BuiltinInfo[] = { + { "not a builtin function", 0, 0 }, +#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS }, +#include "clang/AST/Builtins.def" +}; + +const Builtin::Info &Builtin::Context::GetRecord(unsigned ID) const { + if (ID < Builtin::FirstTSBuiltin) + return BuiltinInfo[ID]; + assert(ID - Builtin::FirstTSBuiltin < NumTSRecords && "Invalid builtin ID!"); + return TSRecords[ID - Builtin::FirstTSBuiltin]; +} + + +/// InitializeBuiltins - Mark the identifiers for all the builtins with their +/// appropriate builtin ID # and mark any non-portable builtin identifiers as +/// such. +void Builtin::Context::InitializeBuiltins(IdentifierTable &Table, + const TargetInfo &Target) { + // Step #1: mark all target-independent builtins with their ID's. + for (unsigned i = Builtin::NotBuiltin+1; i != Builtin::FirstTSBuiltin; ++i) + Table.get(BuiltinInfo[i].Name).setBuiltinID(i); + + // Step #2: Get target builtins. + Target.getTargetBuiltins(TSRecords, NumTSRecords); + + // Step #3: Register target-specific builtins. + for (unsigned i = 0, e = NumTSRecords; i != e; ++i) + Table.get(TSRecords[i].Name).setBuiltinID(i+Builtin::FirstTSBuiltin); +} + +/// DecodeTypeFromStr - This decodes one type descriptor from Str, advancing the +/// pointer over the consumed characters. This returns the resultant type. +static QualType DecodeTypeFromStr(const char *&Str, ASTContext &Context, + bool AllowTypeModifiers = true) { + // Modifiers. + bool Long = false, LongLong = false, Signed = false, Unsigned = false; + + // Read the modifiers first. + bool Done = false; + while (!Done) { + switch (*Str++) { + default: Done = true; --Str; break; + case 'S': + assert(!Unsigned && "Can't use both 'S' and 'U' modifiers!"); + assert(!Signed && "Can't use 'S' modifier multiple times!"); + Signed = true; + break; + case 'U': + assert(!Signed && "Can't use both 'S' and 'U' modifiers!"); + assert(!Unsigned && "Can't use 'S' modifier multiple times!"); + Unsigned = true; + break; + case 'L': + assert(!LongLong && "Can't have LLL modifier"); + if (Long) + LongLong = true; + else + Long = true; + break; + } + } + + QualType Type; + + // Read the base type. + switch (*Str++) { + default: assert(0 && "Unknown builtin type letter!"); + case 'v': + assert(!Long && !Signed && !Unsigned && "Bad modifiers used with 'v'!"); + Type = Context.VoidTy; + break; + case 'f': + assert(!Long && !Signed && !Unsigned && "Bad modifiers used with 'f'!"); + Type = Context.FloatTy; + break; + case 'd': + assert(!LongLong && !Signed && !Unsigned && "Bad modifiers used with 'd'!"); + if (Long) + Type = Context.LongDoubleTy; + else + Type = Context.DoubleTy; + break; + case 's': + assert(!LongLong && "Bad modifiers used with 's'!"); + if (Unsigned) + Type = Context.UnsignedShortTy; + else + Type = Context.ShortTy; + break; + case 'i': + if (LongLong) + Type = Unsigned ? Context.UnsignedLongLongTy : Context.LongLongTy; + else if (Long) + Type = Unsigned ? Context.UnsignedLongTy : Context.LongTy; + else if (Unsigned) + Type = Context.UnsignedIntTy; + else + Type = Context.IntTy; // default is signed. + break; + case 'c': + assert(!Long && !LongLong && "Bad modifiers used with 'c'!"); + if (Signed) + Type = Context.SignedCharTy; + else if (Unsigned) + Type = Context.UnsignedCharTy; + else + Type = Context.CharTy; + break; + case 'z': // size_t. + assert(!Long && !Signed && !Unsigned && "Bad modifiers for 'z'!"); + Type = Context.getSizeType(); + break; + case 'F': + Type = Context.getCFConstantStringType(); + break; + case 'a': + Type = Context.getBuiltinVaListType(); + assert(!Type.isNull() && "builtin va list type not initialized!"); + break; + case 'V': { + char *End; + + unsigned NumElements = strtoul(Str, &End, 10); + assert(End != Str && "Missing vector size"); + + Str = End; + + QualType ElementType = DecodeTypeFromStr(Str, Context, false); + Type = Context.getVectorType(ElementType, NumElements); + break; + } + } + + if (!AllowTypeModifiers) + return Type; + + Done = false; + while (!Done) { + switch (*Str++) { + default: Done = true; --Str; break; + case '*': + Type = Context.getPointerType(Type); + break; + case '&': + Type = Context.getReferenceType(Type); + break; + case 'C': + Type = Type.getQualifiedType(QualType::Const); + break; + } + } + + return Type; +} + +/// GetBuiltinType - Return the type for the specified builtin. +QualType Builtin::Context::GetBuiltinType(unsigned id, + ASTContext &Context) const { + const char *TypeStr = GetRecord(id).Type; + + llvm::SmallVector<QualType, 8> ArgTypes; + + QualType ResType = DecodeTypeFromStr(TypeStr, Context); + while (TypeStr[0] && TypeStr[0] != '.') + ArgTypes.push_back(DecodeTypeFromStr(TypeStr, Context)); + + assert((TypeStr[0] != '.' || TypeStr[1] == 0) && + "'.' should only occur at end of builtin type list!"); + + // handle untyped/variadic arguments "T c99Style();" or "T cppStyle(...);". + if (ArgTypes.size() == 0 && TypeStr[0] == '.') + return Context.getFunctionTypeNoProto(ResType); + return Context.getFunctionType(ResType, &ArgTypes[0], ArgTypes.size(), + TypeStr[0] == '.'); +} diff --git a/clang/lib/AST/CFG.cpp b/clang/lib/AST/CFG.cpp new file mode 100644 index 00000000000..e2aba6b3ff3 --- /dev/null +++ b/clang/lib/AST/CFG.cpp @@ -0,0 +1,1509 @@ +//===--- CFG.cpp - Classes for representing and building CFGs----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the CFG and CFGBuilder classes for representing and +// building Control-Flow Graphs (CFGs) from ASTs. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/CFG.h" +#include "clang/AST/Expr.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/PrettyPrinter.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/Streams.h" +#include "llvm/Support/Compiler.h" +#include <set> +#include <iomanip> +#include <algorithm> +#include <sstream> +#include <iostream> + +using namespace clang; + +namespace { + +// SaveAndRestore - A utility class that uses RIIA to save and restore +// the value of a variable. +template<typename T> +struct VISIBILITY_HIDDEN SaveAndRestore { + SaveAndRestore(T& x) : X(x), old_value(x) {} + ~SaveAndRestore() { X = old_value; } + T get() { return old_value; } + + T& X; + T old_value; +}; + +/// CFGBuilder - This class is implements CFG construction from an AST. +/// The builder is stateful: an instance of the builder should be used to only +/// construct a single CFG. +/// +/// Example usage: +/// +/// CFGBuilder builder; +/// CFG* cfg = builder.BuildAST(stmt1); +/// +/// CFG construction is done via a recursive walk of an AST. +/// We actually parse the AST in reverse order so that the successor +/// of a basic block is constructed prior to its predecessor. This +/// allows us to nicely capture implicit fall-throughs without extra +/// basic blocks. +/// +class VISIBILITY_HIDDEN CFGBuilder : public StmtVisitor<CFGBuilder,CFGBlock*> { + CFG* cfg; + CFGBlock* Block; + CFGBlock* Succ; + CFGBlock* ContinueTargetBlock; + CFGBlock* BreakTargetBlock; + CFGBlock* SwitchTerminatedBlock; + CFGBlock* DefaultCaseBlock; + + // LabelMap records the mapping from Label expressions to their blocks. + typedef llvm::DenseMap<LabelStmt*,CFGBlock*> LabelMapTy; + LabelMapTy LabelMap; + + // A list of blocks that end with a "goto" that must be backpatched to + // their resolved targets upon completion of CFG construction. + typedef std::vector<CFGBlock*> BackpatchBlocksTy; + BackpatchBlocksTy BackpatchBlocks; + + // A list of labels whose address has been taken (for indirect gotos). + typedef llvm::SmallPtrSet<LabelStmt*,5> LabelSetTy; + LabelSetTy AddressTakenLabels; + +public: + explicit CFGBuilder() : cfg(NULL), Block(NULL), Succ(NULL), + ContinueTargetBlock(NULL), BreakTargetBlock(NULL), + SwitchTerminatedBlock(NULL), DefaultCaseBlock(NULL) { + // Create an empty CFG. + cfg = new CFG(); + } + + ~CFGBuilder() { delete cfg; } + + // buildCFG - Used by external clients to construct the CFG. + CFG* buildCFG(Stmt* Statement); + + // Visitors to walk an AST and construct the CFG. Called by + // buildCFG. Do not call directly! + + CFGBlock* VisitStmt(Stmt* Statement); + CFGBlock* VisitNullStmt(NullStmt* Statement); + CFGBlock* VisitCompoundStmt(CompoundStmt* C); + CFGBlock* VisitIfStmt(IfStmt* I); + CFGBlock* VisitReturnStmt(ReturnStmt* R); + CFGBlock* VisitLabelStmt(LabelStmt* L); + CFGBlock* VisitGotoStmt(GotoStmt* G); + CFGBlock* VisitForStmt(ForStmt* F); + CFGBlock* VisitWhileStmt(WhileStmt* W); + CFGBlock* VisitDoStmt(DoStmt* D); + CFGBlock* VisitContinueStmt(ContinueStmt* C); + CFGBlock* VisitBreakStmt(BreakStmt* B); + CFGBlock* VisitSwitchStmt(SwitchStmt* S); + CFGBlock* VisitCaseStmt(CaseStmt* S); + CFGBlock* VisitDefaultStmt(DefaultStmt* D); + CFGBlock* VisitIndirectGotoStmt(IndirectGotoStmt* I); + + // FIXME: Add support for ObjC-specific control-flow structures. + + CFGBlock* VisitObjCForCollectionStmt(ObjCForCollectionStmt* S) { + badCFG = true; + return Block; + } + + CFGBlock* VisitObjCAtTryStmt(ObjCAtTryStmt* S) { + badCFG = true; + return Block; + } + +private: + CFGBlock* createBlock(bool add_successor = true); + CFGBlock* addStmt(Stmt* S); + CFGBlock* WalkAST(Stmt* S, bool AlwaysAddStmt); + CFGBlock* WalkAST_VisitChildren(Stmt* S); + CFGBlock* WalkAST_VisitDeclSubExprs(StmtIterator& I); + CFGBlock* WalkAST_VisitStmtExpr(StmtExpr* S); + void FinishBlock(CFGBlock* B); + + bool badCFG; +}; + +/// BuildCFG - Constructs a CFG from an AST (a Stmt*). The AST can +/// represent an arbitrary statement. Examples include a single expression +/// or a function body (compound statement). The ownership of the returned +/// CFG is transferred to the caller. If CFG construction fails, this method +/// returns NULL. +CFG* CFGBuilder::buildCFG(Stmt* Statement) { + assert (cfg); + if (!Statement) return NULL; + + badCFG = false; + + // Create an empty block that will serve as the exit block for the CFG. + // Since this is the first block added to the CFG, it will be implicitly + // registered as the exit block. + Succ = createBlock(); + assert (Succ == &cfg->getExit()); + Block = NULL; // the EXIT block is empty. Create all other blocks lazily. + + // Visit the statements and create the CFG. + CFGBlock* B = Visit(Statement); + if (!B) B = Succ; + + if (B) { + // Finalize the last constructed block. This usually involves + // reversing the order of the statements in the block. + if (Block) FinishBlock(B); + + // Backpatch the gotos whose label -> block mappings we didn't know + // when we encountered them. + for (BackpatchBlocksTy::iterator I = BackpatchBlocks.begin(), + E = BackpatchBlocks.end(); I != E; ++I ) { + + CFGBlock* B = *I; + GotoStmt* G = cast<GotoStmt>(B->getTerminator()); + LabelMapTy::iterator LI = LabelMap.find(G->getLabel()); + + // If there is no target for the goto, then we are looking at an + // incomplete AST. Handle this by not registering a successor. + if (LI == LabelMap.end()) continue; + + B->addSuccessor(LI->second); + } + + // Add successors to the Indirect Goto Dispatch block (if we have one). + if (CFGBlock* B = cfg->getIndirectGotoBlock()) + for (LabelSetTy::iterator I = AddressTakenLabels.begin(), + E = AddressTakenLabels.end(); I != E; ++I ) { + + // Lookup the target block. + LabelMapTy::iterator LI = LabelMap.find(*I); + + // If there is no target block that contains label, then we are looking + // at an incomplete AST. Handle this by not registering a successor. + if (LI == LabelMap.end()) continue; + + B->addSuccessor(LI->second); + } + + Succ = B; + } + + // Create an empty entry block that has no predecessors. + cfg->setEntry(createBlock()); + + if (badCFG) { + delete cfg; + cfg = NULL; + return NULL; + } + + // NULL out cfg so that repeated calls to the builder will fail and that + // the ownership of the constructed CFG is passed to the caller. + CFG* t = cfg; + cfg = NULL; + return t; +} + +/// createBlock - Used to lazily create blocks that are connected +/// to the current (global) succcessor. +CFGBlock* CFGBuilder::createBlock(bool add_successor) { + CFGBlock* B = cfg->createBlock(); + if (add_successor && Succ) B->addSuccessor(Succ); + return B; +} + +/// FinishBlock - When the last statement has been added to the block, +/// we must reverse the statements because they have been inserted +/// in reverse order. +void CFGBuilder::FinishBlock(CFGBlock* B) { + assert (B); + B->reverseStmts(); +} + +/// addStmt - Used to add statements/expressions to the current CFGBlock +/// "Block". This method calls WalkAST on the passed statement to see if it +/// contains any short-circuit expressions. If so, it recursively creates +/// the necessary blocks for such expressions. It returns the "topmost" block +/// of the created blocks, or the original value of "Block" when this method +/// was called if no additional blocks are created. +CFGBlock* CFGBuilder::addStmt(Stmt* S) { + if (!Block) Block = createBlock(); + return WalkAST(S,true); +} + +/// WalkAST - Used by addStmt to walk the subtree of a statement and +/// add extra blocks for ternary operators, &&, and ||. We also +/// process "," and DeclStmts (which may contain nested control-flow). +CFGBlock* CFGBuilder::WalkAST(Stmt* S, bool AlwaysAddStmt = false) { + switch (S->getStmtClass()) { + case Stmt::ConditionalOperatorClass: { + ConditionalOperator* C = cast<ConditionalOperator>(S); + + // Create the confluence block that will "merge" the results + // of the ternary expression. + CFGBlock* ConfluenceBlock = (Block) ? Block : createBlock(); + ConfluenceBlock->appendStmt(C); + FinishBlock(ConfluenceBlock); + + // Create a block for the LHS expression if there is an LHS expression. + // A GCC extension allows LHS to be NULL, causing the condition to + // be the value that is returned instead. + // e.g: x ?: y is shorthand for: x ? x : y; + Succ = ConfluenceBlock; + Block = NULL; + CFGBlock* LHSBlock = NULL; + if (C->getLHS()) { + LHSBlock = Visit(C->getLHS()); + FinishBlock(LHSBlock); + Block = NULL; + } + + // Create the block for the RHS expression. + Succ = ConfluenceBlock; + CFGBlock* RHSBlock = Visit(C->getRHS()); + FinishBlock(RHSBlock); + + // Create the block that will contain the condition. + Block = createBlock(false); + + if (LHSBlock) + Block->addSuccessor(LHSBlock); + else { + // If we have no LHS expression, add the ConfluenceBlock as a direct + // successor for the block containing the condition. Moreover, + // we need to reverse the order of the predecessors in the + // ConfluenceBlock because the RHSBlock will have been added to + // the succcessors already, and we want the first predecessor to the + // the block containing the expression for the case when the ternary + // expression evaluates to true. + Block->addSuccessor(ConfluenceBlock); + assert (ConfluenceBlock->pred_size() == 2); + std::reverse(ConfluenceBlock->pred_begin(), + ConfluenceBlock->pred_end()); + } + + Block->addSuccessor(RHSBlock); + + Block->setTerminator(C); + return addStmt(C->getCond()); + } + + case Stmt::ChooseExprClass: { + ChooseExpr* C = cast<ChooseExpr>(S); + + CFGBlock* ConfluenceBlock = (Block) ? Block : createBlock(); + ConfluenceBlock->appendStmt(C); + FinishBlock(ConfluenceBlock); + + Succ = ConfluenceBlock; + Block = NULL; + CFGBlock* LHSBlock = Visit(C->getLHS()); + FinishBlock(LHSBlock); + + Succ = ConfluenceBlock; + Block = NULL; + CFGBlock* RHSBlock = Visit(C->getRHS()); + FinishBlock(RHSBlock); + + Block = createBlock(false); + Block->addSuccessor(LHSBlock); + Block->addSuccessor(RHSBlock); + Block->setTerminator(C); + return addStmt(C->getCond()); + } + + case Stmt::DeclStmtClass: { + ScopedDecl* D = cast<DeclStmt>(S)->getDecl(); + Block->appendStmt(S); + + StmtIterator I(D); + return WalkAST_VisitDeclSubExprs(I); + } + + case Stmt::AddrLabelExprClass: { + AddrLabelExpr* A = cast<AddrLabelExpr>(S); + AddressTakenLabels.insert(A->getLabel()); + + if (AlwaysAddStmt) Block->appendStmt(S); + return Block; + } + + case Stmt::StmtExprClass: + return WalkAST_VisitStmtExpr(cast<StmtExpr>(S)); + + case Stmt::UnaryOperatorClass: { + UnaryOperator* U = cast<UnaryOperator>(S); + + // sizeof(expressions). For such expressions, + // the subexpression is not really evaluated, so + // we don't care about control-flow within the sizeof. + if (U->getOpcode() == UnaryOperator::SizeOf) { + Block->appendStmt(S); + return Block; + } + + break; + } + + case Stmt::BinaryOperatorClass: { + BinaryOperator* B = cast<BinaryOperator>(S); + + if (B->isLogicalOp()) { // && or || + CFGBlock* ConfluenceBlock = (Block) ? Block : createBlock(); + ConfluenceBlock->appendStmt(B); + FinishBlock(ConfluenceBlock); + + // create the block evaluating the LHS + CFGBlock* LHSBlock = createBlock(false); + LHSBlock->setTerminator(B); + + // create the block evaluating the RHS + Succ = ConfluenceBlock; + Block = NULL; + CFGBlock* RHSBlock = Visit(B->getRHS()); + + // Now link the LHSBlock with RHSBlock. + if (B->getOpcode() == BinaryOperator::LOr) { + LHSBlock->addSuccessor(ConfluenceBlock); + LHSBlock->addSuccessor(RHSBlock); + } + else { + assert (B->getOpcode() == BinaryOperator::LAnd); + LHSBlock->addSuccessor(RHSBlock); + LHSBlock->addSuccessor(ConfluenceBlock); + } + + // Generate the blocks for evaluating the LHS. + Block = LHSBlock; + return addStmt(B->getLHS()); + } + else if (B->getOpcode() == BinaryOperator::Comma) { // , + Block->appendStmt(B); + addStmt(B->getRHS()); + return addStmt(B->getLHS()); + } + + break; + } + + case Stmt::ParenExprClass: + return WalkAST(cast<ParenExpr>(S)->getSubExpr(), AlwaysAddStmt); + + default: + break; + }; + + if (AlwaysAddStmt) Block->appendStmt(S); + return WalkAST_VisitChildren(S); +} + +/// WalkAST_VisitDeclSubExprs - Utility method to handle Decls contained in +/// DeclStmts. Because the initialization code (and sometimes the +/// the type declarations) for DeclStmts can contain arbitrary expressions, +/// we must linearize declarations to handle arbitrary control-flow induced by +/// those expressions. +CFGBlock* CFGBuilder::WalkAST_VisitDeclSubExprs(StmtIterator& I) { + if (I == StmtIterator()) + return Block; + + Stmt* S = *I; + ++I; + WalkAST_VisitDeclSubExprs(I); + + // Optimization: Don't create separate block-level statements for literals. + + switch (S->getStmtClass()) { + case Stmt::IntegerLiteralClass: + case Stmt::CharacterLiteralClass: + case Stmt::StringLiteralClass: + break; + + // All other cases. + + default: + Block = addStmt(S); + } + + return Block; +} + +/// WalkAST_VisitChildren - Utility method to call WalkAST on the +/// children of a Stmt. +CFGBlock* CFGBuilder::WalkAST_VisitChildren(Stmt* S) { + CFGBlock* B = Block; + for (Stmt::child_iterator I = S->child_begin(), E = S->child_end() ; + I != E; ++I) + if (*I) B = WalkAST(*I); + + return B; +} + +/// WalkAST_VisitStmtExpr - Utility method to handle (nested) statement +/// expressions (a GCC extension). +CFGBlock* CFGBuilder::WalkAST_VisitStmtExpr(StmtExpr* S) { + Block->appendStmt(S); + return VisitCompoundStmt(S->getSubStmt()); +} + +/// VisitStmt - Handle statements with no branching control flow. +CFGBlock* CFGBuilder::VisitStmt(Stmt* Statement) { + // We cannot assume that we are in the middle of a basic block, since + // the CFG might only be constructed for this single statement. If + // we have no current basic block, just create one lazily. + if (!Block) Block = createBlock(); + + // Simply add the statement to the current block. We actually + // insert statements in reverse order; this order is reversed later + // when processing the containing element in the AST. + addStmt(Statement); + + return Block; +} + +CFGBlock* CFGBuilder::VisitNullStmt(NullStmt* Statement) { + return Block; +} + +CFGBlock* CFGBuilder::VisitCompoundStmt(CompoundStmt* C) { + + for (CompoundStmt::reverse_body_iterator I=C->body_rbegin(), E=C->body_rend(); + I != E; ++I ) { + Visit(*I); + } + + return Block; +} + +CFGBlock* CFGBuilder::VisitIfStmt(IfStmt* I) { + // We may see an if statement in the middle of a basic block, or + // it may be the first statement we are processing. In either case, + // we create a new basic block. First, we create the blocks for + // the then...else statements, and then we create the block containing + // the if statement. If we were in the middle of a block, we + // stop processing that block and reverse its statements. That block + // is then the implicit successor for the "then" and "else" clauses. + + // The block we were proccessing is now finished. Make it the + // successor block. + if (Block) { + Succ = Block; + FinishBlock(Block); + } + + // Process the false branch. NULL out Block so that the recursive + // call to Visit will create a new basic block. + // Null out Block so that all successor + CFGBlock* ElseBlock = Succ; + + if (Stmt* Else = I->getElse()) { + SaveAndRestore<CFGBlock*> sv(Succ); + + // NULL out Block so that the recursive call to Visit will + // create a new basic block. + Block = NULL; + ElseBlock = Visit(Else); + + if (!ElseBlock) // Can occur when the Else body has all NullStmts. + ElseBlock = sv.get(); + else if (Block) + FinishBlock(ElseBlock); + } + + // Process the true branch. NULL out Block so that the recursive + // call to Visit will create a new basic block. + // Null out Block so that all successor + CFGBlock* ThenBlock; + { + Stmt* Then = I->getThen(); + assert (Then); + SaveAndRestore<CFGBlock*> sv(Succ); + Block = NULL; + ThenBlock = Visit(Then); + + if (!ThenBlock) // Can occur when the Then body has all NullStmts. + ThenBlock = sv.get(); + else if (Block) + FinishBlock(ThenBlock); + } + + // Now create a new block containing the if statement. + Block = createBlock(false); + + // Set the terminator of the new block to the If statement. + Block->setTerminator(I); + + // Now add the successors. + Block->addSuccessor(ThenBlock); + Block->addSuccessor(ElseBlock); + + // Add the condition as the last statement in the new block. This + // may create new blocks as the condition may contain control-flow. Any + // newly created blocks will be pointed to be "Block". + return addStmt(I->getCond()->IgnoreParens()); +} + + +CFGBlock* CFGBuilder::VisitReturnStmt(ReturnStmt* R) { + // If we were in the middle of a block we stop processing that block + // and reverse its statements. + // + // NOTE: If a "return" appears in the middle of a block, this means + // that the code afterwards is DEAD (unreachable). We still + // keep a basic block for that code; a simple "mark-and-sweep" + // from the entry block will be able to report such dead + // blocks. + if (Block) FinishBlock(Block); + + // Create the new block. + Block = createBlock(false); + + // The Exit block is the only successor. + Block->addSuccessor(&cfg->getExit()); + + // Add the return statement to the block. This may create new blocks + // if R contains control-flow (short-circuit operations). + return addStmt(R); +} + +CFGBlock* CFGBuilder::VisitLabelStmt(LabelStmt* L) { + // Get the block of the labeled statement. Add it to our map. + Visit(L->getSubStmt()); + CFGBlock* LabelBlock = Block; + + if (!LabelBlock) // This can happen when the body is empty, i.e. + LabelBlock=createBlock(); // scopes that only contains NullStmts. + + assert (LabelMap.find(L) == LabelMap.end() && "label already in map"); + LabelMap[ L ] = LabelBlock; + + // Labels partition blocks, so this is the end of the basic block + // we were processing (L is the block's label). Because this is + // label (and we have already processed the substatement) there is no + // extra control-flow to worry about. + LabelBlock->setLabel(L); + FinishBlock(LabelBlock); + + // We set Block to NULL to allow lazy creation of a new block + // (if necessary); + Block = NULL; + + // This block is now the implicit successor of other blocks. + Succ = LabelBlock; + + return LabelBlock; +} + +CFGBlock* CFGBuilder::VisitGotoStmt(GotoStmt* G) { + // Goto is a control-flow statement. Thus we stop processing the + // current block and create a new one. + if (Block) FinishBlock(Block); + Block = createBlock(false); + Block->setTerminator(G); + + // If we already know the mapping to the label block add the + // successor now. + LabelMapTy::iterator I = LabelMap.find(G->getLabel()); + + if (I == LabelMap.end()) + // We will need to backpatch this block later. + BackpatchBlocks.push_back(Block); + else + Block->addSuccessor(I->second); + + return Block; +} + +CFGBlock* CFGBuilder::VisitForStmt(ForStmt* F) { + // "for" is a control-flow statement. Thus we stop processing the + // current block. + + CFGBlock* LoopSuccessor = NULL; + + if (Block) { + FinishBlock(Block); + LoopSuccessor = Block; + } + else LoopSuccessor = Succ; + + // Because of short-circuit evaluation, the condition of the loop + // can span multiple basic blocks. Thus we need the "Entry" and "Exit" + // blocks that evaluate the condition. + CFGBlock* ExitConditionBlock = createBlock(false); + CFGBlock* EntryConditionBlock = ExitConditionBlock; + + // Set the terminator for the "exit" condition block. + ExitConditionBlock->setTerminator(F); + + // Now add the actual condition to the condition block. Because the + // condition itself may contain control-flow, new blocks may be created. + if (Stmt* C = F->getCond()) { + Block = ExitConditionBlock; + EntryConditionBlock = addStmt(C); + if (Block) FinishBlock(EntryConditionBlock); + } + + // The condition block is the implicit successor for the loop body as + // well as any code above the loop. + Succ = EntryConditionBlock; + + // Now create the loop body. + { + assert (F->getBody()); + + // Save the current values for Block, Succ, and continue and break targets + SaveAndRestore<CFGBlock*> save_Block(Block), save_Succ(Succ), + save_continue(ContinueTargetBlock), + save_break(BreakTargetBlock); + + // All continues within this loop should go to the condition block + ContinueTargetBlock = EntryConditionBlock; + + // All breaks should go to the code following the loop. + BreakTargetBlock = LoopSuccessor; + + // Create a new block to contain the (bottom) of the loop body. + Block = NULL; + + // If we have increment code, insert it at the end of the body block. + if (Stmt* I = F->getInc()) Block = addStmt(I); + + // Now populate the body block, and in the process create new blocks + // as we walk the body of the loop. + CFGBlock* BodyBlock = Visit(F->getBody()); + + if (!BodyBlock) + BodyBlock = EntryConditionBlock; // can happen for "for (...;...; ) ;" + else if (Block) + FinishBlock(BodyBlock); + + // This new body block is a successor to our "exit" condition block. + ExitConditionBlock->addSuccessor(BodyBlock); + } + + // Link up the condition block with the code that follows the loop. + // (the false branch). + ExitConditionBlock->addSuccessor(LoopSuccessor); + + // If the loop contains initialization, create a new block for those + // statements. This block can also contain statements that precede + // the loop. + if (Stmt* I = F->getInit()) { + Block = createBlock(); + return addStmt(I); + } + else { + // There is no loop initialization. We are thus basically a while + // loop. NULL out Block to force lazy block construction. + Block = NULL; + Succ = EntryConditionBlock; + return EntryConditionBlock; + } +} + +CFGBlock* CFGBuilder::VisitWhileStmt(WhileStmt* W) { + // "while" is a control-flow statement. Thus we stop processing the + // current block. + + CFGBlock* LoopSuccessor = NULL; + + if (Block) { + FinishBlock(Block); + LoopSuccessor = Block; + } + else LoopSuccessor = Succ; + + // Because of short-circuit evaluation, the condition of the loop + // can span multiple basic blocks. Thus we need the "Entry" and "Exit" + // blocks that evaluate the condition. + CFGBlock* ExitConditionBlock = createBlock(false); + CFGBlock* EntryConditionBlock = ExitConditionBlock; + + // Set the terminator for the "exit" condition block. + ExitConditionBlock->setTerminator(W); + + // Now add the actual condition to the condition block. Because the + // condition itself may contain control-flow, new blocks may be created. + // Thus we update "Succ" after adding the condition. + if (Stmt* C = W->getCond()) { + Block = ExitConditionBlock; + EntryConditionBlock = addStmt(C); + assert (Block == EntryConditionBlock); + if (Block) FinishBlock(EntryConditionBlock); + } + + // The condition block is the implicit successor for the loop body as + // well as any code above the loop. + Succ = EntryConditionBlock; + + // Process the loop body. + { + assert (W->getBody()); + + // Save the current values for Block, Succ, and continue and break targets + SaveAndRestore<CFGBlock*> save_Block(Block), save_Succ(Succ), + save_continue(ContinueTargetBlock), + save_break(BreakTargetBlock); + + // All continues within this loop should go to the condition block + ContinueTargetBlock = EntryConditionBlock; + + // All breaks should go to the code following the loop. + BreakTargetBlock = LoopSuccessor; + + // NULL out Block to force lazy instantiation of blocks for the body. + Block = NULL; + + // Create the body. The returned block is the entry to the loop body. + CFGBlock* BodyBlock = Visit(W->getBody()); + + if (!BodyBlock) + BodyBlock = EntryConditionBlock; // can happen for "while(...) ;" + else if (Block) + FinishBlock(BodyBlock); + + // Add the loop body entry as a successor to the condition. + ExitConditionBlock->addSuccessor(BodyBlock); + } + + // Link up the condition block with the code that follows the loop. + // (the false branch). + ExitConditionBlock->addSuccessor(LoopSuccessor); + + // There can be no more statements in the condition block + // since we loop back to this block. NULL out Block to force + // lazy creation of another block. + Block = NULL; + + // Return the condition block, which is the dominating block for the loop. + Succ = EntryConditionBlock; + return EntryConditionBlock; +} + +CFGBlock* CFGBuilder::VisitDoStmt(DoStmt* D) { + // "do...while" is a control-flow statement. Thus we stop processing the + // current block. + + CFGBlock* LoopSuccessor = NULL; + + if (Block) { + FinishBlock(Block); + LoopSuccessor = Block; + } + else LoopSuccessor = Succ; + + // Because of short-circuit evaluation, the condition of the loop + // can span multiple basic blocks. Thus we need the "Entry" and "Exit" + // blocks that evaluate the condition. + CFGBlock* ExitConditionBlock = createBlock(false); + CFGBlock* EntryConditionBlock = ExitConditionBlock; + + // Set the terminator for the "exit" condition block. + ExitConditionBlock->setTerminator(D); + + // Now add the actual condition to the condition block. Because the + // condition itself may contain control-flow, new blocks may be created. + if (Stmt* C = D->getCond()) { + Block = ExitConditionBlock; + EntryConditionBlock = addStmt(C); + if (Block) FinishBlock(EntryConditionBlock); + } + + // The condition block is the implicit successor for the loop body. + Succ = EntryConditionBlock; + + // Process the loop body. + CFGBlock* BodyBlock = NULL; + { + assert (D->getBody()); + + // Save the current values for Block, Succ, and continue and break targets + SaveAndRestore<CFGBlock*> save_Block(Block), save_Succ(Succ), + save_continue(ContinueTargetBlock), + save_break(BreakTargetBlock); + + // All continues within this loop should go to the condition block + ContinueTargetBlock = EntryConditionBlock; + + // All breaks should go to the code following the loop. + BreakTargetBlock = LoopSuccessor; + + // NULL out Block to force lazy instantiation of blocks for the body. + Block = NULL; + + // Create the body. The returned block is the entry to the loop body. + BodyBlock = Visit(D->getBody()); + + if (!BodyBlock) + BodyBlock = EntryConditionBlock; // can happen for "do ; while(...)" + else if (Block) + FinishBlock(BodyBlock); + + // Add the loop body entry as a successor to the condition. + ExitConditionBlock->addSuccessor(BodyBlock); + } + + // Link up the condition block with the code that follows the loop. + // (the false branch). + ExitConditionBlock->addSuccessor(LoopSuccessor); + + // There can be no more statements in the body block(s) + // since we loop back to the body. NULL out Block to force + // lazy creation of another block. + Block = NULL; + + // Return the loop body, which is the dominating block for the loop. + Succ = BodyBlock; + return BodyBlock; +} + +CFGBlock* CFGBuilder::VisitContinueStmt(ContinueStmt* C) { + // "continue" is a control-flow statement. Thus we stop processing the + // current block. + if (Block) FinishBlock(Block); + + // Now create a new block that ends with the continue statement. + Block = createBlock(false); + Block->setTerminator(C); + + // If there is no target for the continue, then we are looking at an + // incomplete AST. Handle this by not registering a successor. + if (ContinueTargetBlock) Block->addSuccessor(ContinueTargetBlock); + + return Block; +} + +CFGBlock* CFGBuilder::VisitBreakStmt(BreakStmt* B) { + // "break" is a control-flow statement. Thus we stop processing the + // current block. + if (Block) FinishBlock(Block); + + // Now create a new block that ends with the continue statement. + Block = createBlock(false); + Block->setTerminator(B); + + // If there is no target for the break, then we are looking at an + // incomplete AST. Handle this by not registering a successor. + if (BreakTargetBlock) Block->addSuccessor(BreakTargetBlock); + + return Block; +} + +CFGBlock* CFGBuilder::VisitSwitchStmt(SwitchStmt* S) { + // "switch" is a control-flow statement. Thus we stop processing the + // current block. + CFGBlock* SwitchSuccessor = NULL; + + if (Block) { + FinishBlock(Block); + SwitchSuccessor = Block; + } + else SwitchSuccessor = Succ; + + // Save the current "switch" context. + SaveAndRestore<CFGBlock*> save_switch(SwitchTerminatedBlock), + save_break(BreakTargetBlock), + save_default(DefaultCaseBlock); + + // Set the "default" case to be the block after the switch statement. + // If the switch statement contains a "default:", this value will + // be overwritten with the block for that code. + DefaultCaseBlock = SwitchSuccessor; + + // Create a new block that will contain the switch statement. + SwitchTerminatedBlock = createBlock(false); + + // Now process the switch body. The code after the switch is the implicit + // successor. + Succ = SwitchSuccessor; + BreakTargetBlock = SwitchSuccessor; + + // When visiting the body, the case statements should automatically get + // linked up to the switch. We also don't keep a pointer to the body, + // since all control-flow from the switch goes to case/default statements. + assert (S->getBody() && "switch must contain a non-NULL body"); + Block = NULL; + CFGBlock *BodyBlock = Visit(S->getBody()); + if (Block) FinishBlock(BodyBlock); + + // If we have no "default:" case, the default transition is to the + // code following the switch body. + SwitchTerminatedBlock->addSuccessor(DefaultCaseBlock); + + // Add the terminator and condition in the switch block. + SwitchTerminatedBlock->setTerminator(S); + assert (S->getCond() && "switch condition must be non-NULL"); + Block = SwitchTerminatedBlock; + + return addStmt(S->getCond()); +} + +CFGBlock* CFGBuilder::VisitCaseStmt(CaseStmt* S) { + // CaseStmts are essentially labels, so they are the + // first statement in a block. + + if (S->getSubStmt()) Visit(S->getSubStmt()); + CFGBlock* CaseBlock = Block; + if (!CaseBlock) CaseBlock = createBlock(); + + // Cases statements partition blocks, so this is the top of + // the basic block we were processing (the "case XXX:" is the label). + CaseBlock->setLabel(S); + FinishBlock(CaseBlock); + + // Add this block to the list of successors for the block with the + // switch statement. + assert (SwitchTerminatedBlock); + SwitchTerminatedBlock->addSuccessor(CaseBlock); + + // We set Block to NULL to allow lazy creation of a new block (if necessary) + Block = NULL; + + // This block is now the implicit successor of other blocks. + Succ = CaseBlock; + + return CaseBlock; +} + +CFGBlock* CFGBuilder::VisitDefaultStmt(DefaultStmt* S) { + if (S->getSubStmt()) Visit(S->getSubStmt()); + DefaultCaseBlock = Block; + if (!DefaultCaseBlock) DefaultCaseBlock = createBlock(); + + // Default statements partition blocks, so this is the top of + // the basic block we were processing (the "default:" is the label). + DefaultCaseBlock->setLabel(S); + FinishBlock(DefaultCaseBlock); + + // Unlike case statements, we don't add the default block to the + // successors for the switch statement immediately. This is done + // when we finish processing the switch statement. This allows for + // the default case (including a fall-through to the code after the + // switch statement) to always be the last successor of a switch-terminated + // block. + + // We set Block to NULL to allow lazy creation of a new block (if necessary) + Block = NULL; + + // This block is now the implicit successor of other blocks. + Succ = DefaultCaseBlock; + + return DefaultCaseBlock; +} + +CFGBlock* CFGBuilder::VisitIndirectGotoStmt(IndirectGotoStmt* I) { + // Lazily create the indirect-goto dispatch block if there isn't one + // already. + CFGBlock* IBlock = cfg->getIndirectGotoBlock(); + + if (!IBlock) { + IBlock = createBlock(false); + cfg->setIndirectGotoBlock(IBlock); + } + + // IndirectGoto is a control-flow statement. Thus we stop processing the + // current block and create a new one. + if (Block) FinishBlock(Block); + Block = createBlock(false); + Block->setTerminator(I); + Block->addSuccessor(IBlock); + return addStmt(I->getTarget()); +} + + +} // end anonymous namespace + +/// createBlock - Constructs and adds a new CFGBlock to the CFG. The +/// block has no successors or predecessors. If this is the first block +/// created in the CFG, it is automatically set to be the Entry and Exit +/// of the CFG. +CFGBlock* CFG::createBlock() { + bool first_block = begin() == end(); + + // Create the block. + Blocks.push_front(CFGBlock(NumBlockIDs++)); + + // If this is the first block, set it as the Entry and Exit. + if (first_block) Entry = Exit = &front(); + + // Return the block. + return &front(); +} + +/// buildCFG - Constructs a CFG from an AST. Ownership of the returned +/// CFG is returned to the caller. +CFG* CFG::buildCFG(Stmt* Statement) { + CFGBuilder Builder; + return Builder.buildCFG(Statement); +} + +/// reverseStmts - Reverses the orders of statements within a CFGBlock. +void CFGBlock::reverseStmts() { std::reverse(Stmts.begin(),Stmts.end()); } + +//===----------------------------------------------------------------------===// +// CFG: Queries for BlkExprs. +//===----------------------------------------------------------------------===// + +namespace { + typedef llvm::DenseMap<const Stmt*,unsigned> BlkExprMapTy; +} + +static void FindSubExprAssignments(Stmt* S, llvm::SmallPtrSet<Expr*,50>& Set) { + if (!S) + return; + + for (Stmt::child_iterator I=S->child_begin(), E=S->child_end(); I!=E; ++I) { + if (!*I) continue; + + if (BinaryOperator* B = dyn_cast<BinaryOperator>(*I)) + if (B->isAssignmentOp()) Set.insert(B); + + FindSubExprAssignments(*I, Set); + } +} + +static BlkExprMapTy* PopulateBlkExprMap(CFG& cfg) { + BlkExprMapTy* M = new BlkExprMapTy(); + + // Look for assignments that are used as subexpressions. These are the + // only assignments that we want to register as a block-level expression. + llvm::SmallPtrSet<Expr*,50> SubExprAssignments; + + for (CFG::iterator I=cfg.begin(), E=cfg.end(); I != E; ++I) + for (CFGBlock::iterator BI=I->begin(), EI=I->end(); BI != EI; ++BI) + FindSubExprAssignments(*BI, SubExprAssignments); + + // Iterate over the statements again on identify the Expr* and Stmt* at + // the block-level that are block-level expressions. + for (CFG::iterator I=cfg.begin(), E=cfg.end(); I != E; ++I) + for (CFGBlock::iterator BI=I->begin(), EI=I->end(); BI != EI; ++BI) + if (Expr* E = dyn_cast<Expr>(*BI)) { + + if (BinaryOperator* B = dyn_cast<BinaryOperator>(E)) { + // Assignment expressions that are not nested within another + // expression are really "statements" whose value is never + // used by another expression. + if (B->isAssignmentOp() && !SubExprAssignments.count(E)) + continue; + } + else if (const StmtExpr* S = dyn_cast<StmtExpr>(E)) { + // Special handling for statement expressions. The last statement + // in the statement expression is also a block-level expr. + const CompoundStmt* C = S->getSubStmt(); + if (!C->body_empty()) { + unsigned x = M->size(); + (*M)[C->body_back()] = x; + } + } + + unsigned x = M->size(); + (*M)[E] = x; + } + + return M; +} + +CFG::BlkExprNumTy CFG::getBlkExprNum(const Stmt* S) { + assert(S != NULL); + if (!BlkExprMap) { BlkExprMap = (void*) PopulateBlkExprMap(*this); } + + BlkExprMapTy* M = reinterpret_cast<BlkExprMapTy*>(BlkExprMap); + BlkExprMapTy::iterator I = M->find(S); + + if (I == M->end()) return CFG::BlkExprNumTy(); + else return CFG::BlkExprNumTy(I->second); +} + +unsigned CFG::getNumBlkExprs() { + if (const BlkExprMapTy* M = reinterpret_cast<const BlkExprMapTy*>(BlkExprMap)) + return M->size(); + else { + // We assume callers interested in the number of BlkExprs will want + // the map constructed if it doesn't already exist. + BlkExprMap = (void*) PopulateBlkExprMap(*this); + return reinterpret_cast<BlkExprMapTy*>(BlkExprMap)->size(); + } +} + +typedef std::set<std::pair<CFGBlock*,CFGBlock*> > BlkEdgeSetTy; + +const std::pair<CFGBlock*,CFGBlock*>* +CFG::getBlockEdgeImpl(const CFGBlock* B1, const CFGBlock* B2) { + + BlkEdgeSetTy*& p = reinterpret_cast<BlkEdgeSetTy*&>(BlkEdgeSet); + if (!p) p = new BlkEdgeSetTy(); + + return &*(p->insert(std::make_pair(const_cast<CFGBlock*>(B1), + const_cast<CFGBlock*>(B2))).first); +} + +CFG::~CFG() { + delete reinterpret_cast<const BlkExprMapTy*>(BlkExprMap); + delete reinterpret_cast<BlkEdgeSetTy*>(BlkEdgeSet); +} + +//===----------------------------------------------------------------------===// +// CFG pretty printing +//===----------------------------------------------------------------------===// + +namespace { + +class VISIBILITY_HIDDEN StmtPrinterHelper : public PrinterHelper { + + typedef llvm::DenseMap<Stmt*,std::pair<unsigned,unsigned> > StmtMapTy; + StmtMapTy StmtMap; + signed CurrentBlock; + unsigned CurrentStmt; + +public: + + StmtPrinterHelper(const CFG* cfg) : CurrentBlock(0), CurrentStmt(0) { + for (CFG::const_iterator I = cfg->begin(), E = cfg->end(); I != E; ++I ) { + unsigned j = 1; + for (CFGBlock::const_iterator BI = I->begin(), BEnd = I->end() ; + BI != BEnd; ++BI, ++j ) + StmtMap[*BI] = std::make_pair(I->getBlockID(),j); + } + } + + virtual ~StmtPrinterHelper() {} + + void setBlockID(signed i) { CurrentBlock = i; } + void setStmtID(unsigned i) { CurrentStmt = i; } + + virtual bool handledStmt(Stmt* S, std::ostream& OS) { + + StmtMapTy::iterator I = StmtMap.find(S); + + if (I == StmtMap.end()) + return false; + + if (CurrentBlock >= 0 && I->second.first == (unsigned) CurrentBlock + && I->second.second == CurrentStmt) + return false; + + OS << "[B" << I->second.first << "." << I->second.second << "]"; + return true; + } +}; + +class VISIBILITY_HIDDEN CFGBlockTerminatorPrint + : public StmtVisitor<CFGBlockTerminatorPrint,void> { + + std::ostream& OS; + StmtPrinterHelper* Helper; +public: + CFGBlockTerminatorPrint(std::ostream& os, StmtPrinterHelper* helper) + : OS(os), Helper(helper) {} + + void VisitIfStmt(IfStmt* I) { + OS << "if "; + I->getCond()->printPretty(OS,Helper); + } + + // Default case. + void VisitStmt(Stmt* S) { S->printPretty(OS); } + + void VisitForStmt(ForStmt* F) { + OS << "for (" ; + if (F->getInit()) OS << "..."; + OS << "; "; + if (Stmt* C = F->getCond()) C->printPretty(OS,Helper); + OS << "; "; + if (F->getInc()) OS << "..."; + OS << ")"; + } + + void VisitWhileStmt(WhileStmt* W) { + OS << "while " ; + if (Stmt* C = W->getCond()) C->printPretty(OS,Helper); + } + + void VisitDoStmt(DoStmt* D) { + OS << "do ... while "; + if (Stmt* C = D->getCond()) C->printPretty(OS,Helper); + } + + void VisitSwitchStmt(SwitchStmt* S) { + OS << "switch "; + S->getCond()->printPretty(OS,Helper); + } + + void VisitConditionalOperator(ConditionalOperator* C) { + C->getCond()->printPretty(OS,Helper); + OS << " ? ... : ..."; + } + + void VisitChooseExpr(ChooseExpr* C) { + OS << "__builtin_choose_expr( "; + C->getCond()->printPretty(OS,Helper); + OS << " )"; + } + + void VisitIndirectGotoStmt(IndirectGotoStmt* I) { + OS << "goto *"; + I->getTarget()->printPretty(OS,Helper); + } + + void VisitBinaryOperator(BinaryOperator* B) { + if (!B->isLogicalOp()) { + VisitExpr(B); + return; + } + + B->getLHS()->printPretty(OS,Helper); + + switch (B->getOpcode()) { + case BinaryOperator::LOr: + OS << " || ..."; + return; + case BinaryOperator::LAnd: + OS << " && ..."; + return; + default: + assert(false && "Invalid logical operator."); + } + } + + void VisitExpr(Expr* E) { + E->printPretty(OS,Helper); + } +}; + + +void print_stmt(std::ostream&OS, StmtPrinterHelper* Helper, Stmt* S) { + if (Helper) { + // special printing for statement-expressions. + if (StmtExpr* SE = dyn_cast<StmtExpr>(S)) { + CompoundStmt* Sub = SE->getSubStmt(); + + if (Sub->child_begin() != Sub->child_end()) { + OS << "({ ... ; "; + Helper->handledStmt(*SE->getSubStmt()->body_rbegin(),OS); + OS << " })\n"; + return; + } + } + + // special printing for comma expressions. + if (BinaryOperator* B = dyn_cast<BinaryOperator>(S)) { + if (B->getOpcode() == BinaryOperator::Comma) { + OS << "... , "; + Helper->handledStmt(B->getRHS(),OS); + OS << '\n'; + return; + } + } + } + + S->printPretty(OS, Helper); + + // Expressions need a newline. + if (isa<Expr>(S)) OS << '\n'; +} + +void print_block(std::ostream& OS, const CFG* cfg, const CFGBlock& B, + StmtPrinterHelper* Helper, bool print_edges) { + + if (Helper) Helper->setBlockID(B.getBlockID()); + + // Print the header. + OS << "\n [ B" << B.getBlockID(); + + if (&B == &cfg->getEntry()) + OS << " (ENTRY) ]\n"; + else if (&B == &cfg->getExit()) + OS << " (EXIT) ]\n"; + else if (&B == cfg->getIndirectGotoBlock()) + OS << " (INDIRECT GOTO DISPATCH) ]\n"; + else + OS << " ]\n"; + + // Print the label of this block. + if (Stmt* S = const_cast<Stmt*>(B.getLabel())) { + + if (print_edges) + OS << " "; + + if (LabelStmt* L = dyn_cast<LabelStmt>(S)) + OS << L->getName(); + else if (CaseStmt* C = dyn_cast<CaseStmt>(S)) { + OS << "case "; + C->getLHS()->printPretty(OS); + if (C->getRHS()) { + OS << " ... "; + C->getRHS()->printPretty(OS); + } + } + else if (isa<DefaultStmt>(S)) + OS << "default"; + else + assert(false && "Invalid label statement in CFGBlock."); + + OS << ":\n"; + } + + // Iterate through the statements in the block and print them. + unsigned j = 1; + + for (CFGBlock::const_iterator I = B.begin(), E = B.end() ; + I != E ; ++I, ++j ) { + + // Print the statement # in the basic block and the statement itself. + if (print_edges) + OS << " "; + + OS << std::setw(3) << j << ": "; + + if (Helper) + Helper->setStmtID(j); + + print_stmt(OS,Helper,*I); + } + + // Print the terminator of this block. + if (B.getTerminator()) { + if (print_edges) + OS << " "; + + OS << " T: "; + + if (Helper) Helper->setBlockID(-1); + + CFGBlockTerminatorPrint TPrinter(OS,Helper); + TPrinter.Visit(const_cast<Stmt*>(B.getTerminator())); + OS << '\n'; + } + + if (print_edges) { + // Print the predecessors of this block. + OS << " Predecessors (" << B.pred_size() << "):"; + unsigned i = 0; + + for (CFGBlock::const_pred_iterator I = B.pred_begin(), E = B.pred_end(); + I != E; ++I, ++i) { + + if (i == 8 || (i-8) == 0) + OS << "\n "; + + OS << " B" << (*I)->getBlockID(); + } + + OS << '\n'; + + // Print the successors of this block. + OS << " Successors (" << B.succ_size() << "):"; + i = 0; + + for (CFGBlock::const_succ_iterator I = B.succ_begin(), E = B.succ_end(); + I != E; ++I, ++i) { + + if (i == 8 || (i-8) % 10 == 0) + OS << "\n "; + + OS << " B" << (*I)->getBlockID(); + } + + OS << '\n'; + } +} + +} // end anonymous namespace + +/// dump - A simple pretty printer of a CFG that outputs to stderr. +void CFG::dump() const { print(*llvm::cerr.stream()); } + +/// print - A simple pretty printer of a CFG that outputs to an ostream. +void CFG::print(std::ostream& OS) const { + + StmtPrinterHelper Helper(this); + + // Print the entry block. + print_block(OS, this, getEntry(), &Helper, true); + + // Iterate through the CFGBlocks and print them one by one. + for (const_iterator I = Blocks.begin(), E = Blocks.end() ; I != E ; ++I) { + // Skip the entry block, because we already printed it. + if (&(*I) == &getEntry() || &(*I) == &getExit()) + continue; + + print_block(OS, this, *I, &Helper, true); + } + + // Print the exit block. + print_block(OS, this, getExit(), &Helper, true); +} + +/// dump - A simply pretty printer of a CFGBlock that outputs to stderr. +void CFGBlock::dump(const CFG* cfg) const { print(*llvm::cerr.stream(), cfg); } + +/// print - A simple pretty printer of a CFGBlock that outputs to an ostream. +/// Generally this will only be called from CFG::print. +void CFGBlock::print(std::ostream& OS, const CFG* cfg) const { + StmtPrinterHelper Helper(cfg); + print_block(OS, cfg, *this, &Helper, true); +} + +/// printTerminator - A simple pretty printer of the terminator of a CFGBlock. +void CFGBlock::printTerminator(std::ostream& OS) const { + CFGBlockTerminatorPrint TPrinter(OS,NULL); + TPrinter.Visit(const_cast<Stmt*>(getTerminator())); +} + + +//===----------------------------------------------------------------------===// +// CFG Graphviz Visualization +//===----------------------------------------------------------------------===// + + +#ifndef NDEBUG +static StmtPrinterHelper* GraphHelper; +#endif + +void CFG::viewCFG() const { +#ifndef NDEBUG + StmtPrinterHelper H(this); + GraphHelper = &H; + llvm::ViewGraph(this,"CFG"); + GraphHelper = NULL; +#else + std::cerr << "CFG::viewCFG is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif +} + +namespace llvm { +template<> +struct DOTGraphTraits<const CFG*> : public DefaultDOTGraphTraits { + static std::string getNodeLabel(const CFGBlock* Node, const CFG* Graph) { + +#ifndef NDEBUG + std::ostringstream Out; + print_block(Out,Graph, *Node, GraphHelper, false); + std::string OutStr = Out.str(); + + if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); + + // Process string output to make it nicer... + for (unsigned i = 0; i != OutStr.length(); ++i) + if (OutStr[i] == '\n') { // Left justify + OutStr[i] = '\\'; + OutStr.insert(OutStr.begin()+i+1, 'l'); + } + + return OutStr; +#else + return ""; +#endif + } +}; +} // end namespace llvm diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp new file mode 100644 index 00000000000..7fa679cbc08 --- /dev/null +++ b/clang/lib/AST/Decl.cpp @@ -0,0 +1,652 @@ +//===--- Decl.cpp - Declaration AST Node Implementation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Decl class and subclasses. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Decl.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" +#include "clang/Basic/IdentifierTable.h" +#include "llvm/ADT/DenseMap.h" +using namespace clang; + +//===----------------------------------------------------------------------===// +// Statistics +//===----------------------------------------------------------------------===// + +// temporary statistics gathering +static unsigned nFuncs = 0; +static unsigned nBlockVars = 0; +static unsigned nFileVars = 0; +static unsigned nParmVars = 0; +static unsigned nSUC = 0; +static unsigned nEnumConst = 0; +static unsigned nEnumDecls = 0; +static unsigned nTypedef = 0; +static unsigned nFieldDecls = 0; +static unsigned nInterfaceDecls = 0; +static unsigned nClassDecls = 0; +static unsigned nMethodDecls = 0; +static unsigned nProtocolDecls = 0; +static unsigned nForwardProtocolDecls = 0; +static unsigned nCategoryDecls = 0; +static unsigned nIvarDecls = 0; +static unsigned nObjCImplementationDecls = 0; +static unsigned nObjCCategoryImpl = 0; +static unsigned nObjCCompatibleAlias = 0; +static unsigned nObjCPropertyDecl = 0; +static unsigned nLinkageSpecDecl = 0; +static unsigned nFileScopeAsmDecl = 0; + +static bool StatSwitch = false; + +// This keeps track of all decl attributes. Since so few decls have attrs, we +// keep them in a hash map instead of wasting space in the Decl class. +typedef llvm::DenseMap<const Decl*, Attr*> DeclAttrMapTy; + +static DeclAttrMapTy *DeclAttrs = 0; + +const char *Decl::getDeclKindName() const { + switch (DeclKind) { + default: assert(0 && "Unknown decl kind!"); + case Typedef: return "Typedef"; + case Function: return "Function"; + case BlockVar: return "BlockVar"; + case FileVar: return "FileVar"; + case ParmVar: return "ParmVar"; + case EnumConstant: return "EnumConstant"; + case ObjCInterface: return "ObjCInterface"; + case ObjCClass: return "ObjCClass"; + case ObjCMethod: return "ObjCMethod"; + case ObjCProtocol: return "ObjCProtocol"; + case ObjCForwardProtocol: return "ObjCForwardProtocol"; + case Struct: return "Struct"; + case Union: return "Union"; + case Class: return "Class"; + case Enum: return "Enum"; + } +} + +bool Decl::CollectingStats(bool Enable) { + if (Enable) + StatSwitch = true; + return StatSwitch; +} + +void Decl::PrintStats() { + fprintf(stderr, "*** Decl Stats:\n"); + fprintf(stderr, " %d decls total.\n", + int(nFuncs+nBlockVars+nFileVars+nParmVars+nFieldDecls+nSUC+ + nEnumDecls+nEnumConst+nTypedef+nInterfaceDecls+nClassDecls+ + nMethodDecls+nProtocolDecls+nCategoryDecls+nIvarDecls)); + fprintf(stderr, " %d function decls, %d each (%d bytes)\n", + nFuncs, (int)sizeof(FunctionDecl), int(nFuncs*sizeof(FunctionDecl))); + fprintf(stderr, " %d block variable decls, %d each (%d bytes)\n", + nBlockVars, (int)sizeof(BlockVarDecl), + int(nBlockVars*sizeof(BlockVarDecl))); + fprintf(stderr, " %d file variable decls, %d each (%d bytes)\n", + nFileVars, (int)sizeof(FileVarDecl), + int(nFileVars*sizeof(FileVarDecl))); + fprintf(stderr, " %d parameter variable decls, %d each (%d bytes)\n", + nParmVars, (int)sizeof(ParmVarDecl), + int(nParmVars*sizeof(ParmVarDecl))); + fprintf(stderr, " %d field decls, %d each (%d bytes)\n", + nFieldDecls, (int)sizeof(FieldDecl), + int(nFieldDecls*sizeof(FieldDecl))); + fprintf(stderr, " %d struct/union/class decls, %d each (%d bytes)\n", + nSUC, (int)sizeof(RecordDecl), + int(nSUC*sizeof(RecordDecl))); + fprintf(stderr, " %d enum decls, %d each (%d bytes)\n", + nEnumDecls, (int)sizeof(EnumDecl), + int(nEnumDecls*sizeof(EnumDecl))); + fprintf(stderr, " %d enum constant decls, %d each (%d bytes)\n", + nEnumConst, (int)sizeof(EnumConstantDecl), + int(nEnumConst*sizeof(EnumConstantDecl))); + fprintf(stderr, " %d typedef decls, %d each (%d bytes)\n", + nTypedef, (int)sizeof(TypedefDecl),int(nTypedef*sizeof(TypedefDecl))); + // Objective-C decls... + fprintf(stderr, " %d interface decls, %d each (%d bytes)\n", + nInterfaceDecls, (int)sizeof(ObjCInterfaceDecl), + int(nInterfaceDecls*sizeof(ObjCInterfaceDecl))); + fprintf(stderr, " %d instance variable decls, %d each (%d bytes)\n", + nIvarDecls, (int)sizeof(ObjCIvarDecl), + int(nIvarDecls*sizeof(ObjCIvarDecl))); + fprintf(stderr, " %d class decls, %d each (%d bytes)\n", + nClassDecls, (int)sizeof(ObjCClassDecl), + int(nClassDecls*sizeof(ObjCClassDecl))); + fprintf(stderr, " %d method decls, %d each (%d bytes)\n", + nMethodDecls, (int)sizeof(ObjCMethodDecl), + int(nMethodDecls*sizeof(ObjCMethodDecl))); + fprintf(stderr, " %d protocol decls, %d each (%d bytes)\n", + nProtocolDecls, (int)sizeof(ObjCProtocolDecl), + int(nProtocolDecls*sizeof(ObjCProtocolDecl))); + fprintf(stderr, " %d forward protocol decls, %d each (%d bytes)\n", + nForwardProtocolDecls, (int)sizeof(ObjCForwardProtocolDecl), + int(nForwardProtocolDecls*sizeof(ObjCForwardProtocolDecl))); + fprintf(stderr, " %d category decls, %d each (%d bytes)\n", + nCategoryDecls, (int)sizeof(ObjCCategoryDecl), + int(nCategoryDecls*sizeof(ObjCCategoryDecl))); + + fprintf(stderr, " %d class implementation decls, %d each (%d bytes)\n", + nObjCImplementationDecls, (int)sizeof(ObjCImplementationDecl), + int(nObjCImplementationDecls*sizeof(ObjCImplementationDecl))); + + fprintf(stderr, " %d class implementation decls, %d each (%d bytes)\n", + nObjCCategoryImpl, (int)sizeof(ObjCCategoryImplDecl), + int(nObjCCategoryImpl*sizeof(ObjCCategoryImplDecl))); + + fprintf(stderr, " %d compatibility alias decls, %d each (%d bytes)\n", + nObjCCompatibleAlias, (int)sizeof(ObjCCompatibleAliasDecl), + int(nObjCCompatibleAlias*sizeof(ObjCCompatibleAliasDecl))); + + fprintf(stderr, " %d property decls, %d each (%d bytes)\n", + nObjCPropertyDecl, (int)sizeof(ObjCPropertyDecl), + int(nObjCPropertyDecl*sizeof(ObjCPropertyDecl))); + + fprintf(stderr, "Total bytes = %d\n", + int(nFuncs*sizeof(FunctionDecl)+nBlockVars*sizeof(BlockVarDecl)+ + nFileVars*sizeof(FileVarDecl)+nParmVars*sizeof(ParmVarDecl)+ + nFieldDecls*sizeof(FieldDecl)+nSUC*sizeof(RecordDecl)+ + nEnumDecls*sizeof(EnumDecl)+nEnumConst*sizeof(EnumConstantDecl)+ + nTypedef*sizeof(TypedefDecl)+ + nInterfaceDecls*sizeof(ObjCInterfaceDecl)+ + nIvarDecls*sizeof(ObjCIvarDecl)+ + nClassDecls*sizeof(ObjCClassDecl)+ + nMethodDecls*sizeof(ObjCMethodDecl)+ + nProtocolDecls*sizeof(ObjCProtocolDecl)+ + nForwardProtocolDecls*sizeof(ObjCForwardProtocolDecl)+ + nCategoryDecls*sizeof(ObjCCategoryDecl)+ + nObjCImplementationDecls*sizeof(ObjCImplementationDecl)+ + nObjCCategoryImpl*sizeof(ObjCCategoryImplDecl)+ + nObjCCompatibleAlias*sizeof(ObjCCompatibleAliasDecl)+ + nObjCPropertyDecl*sizeof(ObjCPropertyDecl)+ + nLinkageSpecDecl*sizeof(LinkageSpecDecl)+ + nFileScopeAsmDecl*sizeof(FileScopeAsmDecl))); + +} + +void Decl::addDeclKind(Kind k) { + switch (k) { + case Typedef: nTypedef++; break; + case Function: nFuncs++; break; + case BlockVar: nBlockVars++; break; + case FileVar: nFileVars++; break; + case ParmVar: nParmVars++; break; + case EnumConstant: nEnumConst++; break; + case Field: nFieldDecls++; break; + case Struct: case Union: case Class: nSUC++; break; + case Enum: nEnumDecls++; break; + case ObjCInterface: nInterfaceDecls++; break; + case ObjCClass: nClassDecls++; break; + case ObjCMethod: nMethodDecls++; break; + case ObjCProtocol: nProtocolDecls++; break; + case ObjCForwardProtocol: nForwardProtocolDecls++; break; + case ObjCCategory: nCategoryDecls++; break; + case ObjCIvar: nIvarDecls++; break; + case ObjCImplementation: nObjCImplementationDecls++; break; + case ObjCCategoryImpl: nObjCCategoryImpl++; break; + case CompatibleAlias: nObjCCompatibleAlias++; break; + case PropertyDecl: nObjCPropertyDecl++; break; + case LinkageSpec: nLinkageSpecDecl++; break; + case FileScopeAsm: nFileScopeAsmDecl++; break; + } +} + +//===----------------------------------------------------------------------===// +// Decl Allocation/Deallocation Method Implementations +//===----------------------------------------------------------------------===// + +BlockVarDecl *BlockVarDecl::Create(ASTContext &C, SourceLocation L, + IdentifierInfo *Id, QualType T, + StorageClass S, ScopedDecl *PrevDecl) { + void *Mem = C.getAllocator().Allocate<BlockVarDecl>(); + return new (Mem) BlockVarDecl(L, Id, T, S, PrevDecl); +} + + +FileVarDecl *FileVarDecl::Create(ASTContext &C, SourceLocation L, + IdentifierInfo *Id, QualType T, StorageClass S, + ScopedDecl *PrevDecl) { + void *Mem = C.getAllocator().Allocate<FileVarDecl>(); + return new (Mem) FileVarDecl(L, Id, T, S, PrevDecl); +} + +ParmVarDecl *ParmVarDecl::Create(ASTContext &C, SourceLocation L, + IdentifierInfo *Id, QualType T, StorageClass S, + ScopedDecl *PrevDecl) { + void *Mem = C.getAllocator().Allocate<ParmVarDecl>(); + return new (Mem) ParmVarDecl(L, Id, T, S, PrevDecl); +} + +FunctionDecl *FunctionDecl::Create(ASTContext &C, SourceLocation L, + IdentifierInfo *Id, QualType T, + StorageClass S, bool isInline, + ScopedDecl *PrevDecl) { + void *Mem = C.getAllocator().Allocate<FunctionDecl>(); + return new (Mem) FunctionDecl(L, Id, T, S, isInline, PrevDecl); +} + + +EnumConstantDecl *EnumConstantDecl::Create(ASTContext &C, SourceLocation L, + IdentifierInfo *Id, QualType T, + Expr *E, const llvm::APSInt &V, + ScopedDecl *PrevDecl){ + void *Mem = C.getAllocator().Allocate<EnumConstantDecl>(); + return new (Mem) EnumConstantDecl(L, Id, T, E, V, PrevDecl); +} + +TypedefDecl *TypedefDecl::Create(ASTContext &C, SourceLocation L, + IdentifierInfo *Id, QualType T, + ScopedDecl *PD) { + void *Mem = C.getAllocator().Allocate<TypedefDecl>(); + return new (Mem) TypedefDecl(L, Id, T, PD); +} + +EnumDecl *EnumDecl::Create(ASTContext &C, SourceLocation L, IdentifierInfo *Id, + ScopedDecl *PrevDecl) { + void *Mem = C.getAllocator().Allocate<EnumDecl>(); + return new (Mem) EnumDecl(L, Id, PrevDecl); +} + +RecordDecl *RecordDecl::Create(ASTContext &C, Kind DK, SourceLocation L, + IdentifierInfo *Id, ScopedDecl *PrevDecl) { + void *Mem = C.getAllocator().Allocate<RecordDecl>(); + return new (Mem) RecordDecl(DK, L, Id, PrevDecl); +} + + +//===----------------------------------------------------------------------===// +// Decl Implementation +//===----------------------------------------------------------------------===// + +// Out-of-line virtual method providing a home for Decl. +Decl::~Decl() { + if (!HasAttrs) + return; + + DeclAttrMapTy::iterator it = DeclAttrs->find(this); + assert(it != DeclAttrs->end() && "No attrs found but HasAttrs is true!"); + + delete it->second; + DeclAttrs->erase(it); + if (DeclAttrs->empty()) { + delete DeclAttrs; + DeclAttrs = 0; + } +} + +void Decl::addAttr(Attr *NewAttr) { + if (!DeclAttrs) + DeclAttrs = new llvm::DenseMap<const Decl*, Attr*>(); + + Attr *&ExistingAttr = (*DeclAttrs)[this]; + + NewAttr->setNext(ExistingAttr); + ExistingAttr = NewAttr; + + HasAttrs = true; +} + +const Attr *Decl::getAttrs() const { + if (!HasAttrs) + return 0; + + return (*DeclAttrs)[this]; +} + +const char *NamedDecl::getName() const { + if (const IdentifierInfo *II = getIdentifier()) + return II->getName(); + return ""; +} + +FunctionDecl::~FunctionDecl() { + delete[] ParamInfo; +} + +unsigned FunctionDecl::getNumParams() const { + if (isa<FunctionTypeNoProto>(getCanonicalType())) + return 0; + return cast<FunctionTypeProto>(getCanonicalType())->getNumArgs(); +} + +void FunctionDecl::setParams(ParmVarDecl **NewParamInfo, unsigned NumParams) { + assert(ParamInfo == 0 && "Already has param info!"); + assert(NumParams == getNumParams() && "Parameter count mismatch!"); + + // Zero params -> null pointer. + if (NumParams) { + ParamInfo = new ParmVarDecl*[NumParams]; + memcpy(ParamInfo, NewParamInfo, sizeof(ParmVarDecl*)*NumParams); + } +} + + +/// defineBody - When created, RecordDecl's correspond to a forward declared +/// record. This method is used to mark the decl as being defined, with the +/// specified contents. +void RecordDecl::defineBody(FieldDecl **members, unsigned numMembers) { + assert(!isDefinition() && "Cannot redefine record!"); + setDefinition(true); + NumMembers = numMembers; + if (numMembers) { + Members = new FieldDecl*[numMembers]; + memcpy(Members, members, numMembers*sizeof(Decl*)); + } +} + +FieldDecl* RecordDecl::getMember(IdentifierInfo *name) { + if (Members == 0 || NumMembers < 0) + return 0; + + // linear search. When C++ classes come along, will likely need to revisit. + for (int i = 0; i < NumMembers; ++i) { + if (Members[i]->getIdentifier() == name) + return Members[i]; + } + return 0; +} + +//===----------------------------------------------------------------------===// +// Objective-C Decl Implementation +//===----------------------------------------------------------------------===// + +void ObjCMethodDecl::setMethodParams(ParmVarDecl **NewParamInfo, + unsigned NumParams) { + assert(ParamInfo == 0 && "Already has param info!"); + + // Zero params -> null pointer. + if (NumParams) { + ParamInfo = new ParmVarDecl*[NumParams]; + memcpy(ParamInfo, NewParamInfo, sizeof(ParmVarDecl*)*NumParams); + NumMethodParams = NumParams; + } +} + +ObjCMethodDecl::~ObjCMethodDecl() { + delete[] ParamInfo; +} + +/// ObjCAddInstanceVariablesToClass - Inserts instance variables +/// into ObjCInterfaceDecl's fields. +/// +void ObjCInterfaceDecl::addInstanceVariablesToClass(ObjCIvarDecl **ivars, + unsigned numIvars, + SourceLocation RBrac) { + NumIvars = numIvars; + if (numIvars) { + Ivars = new ObjCIvarDecl*[numIvars]; + memcpy(Ivars, ivars, numIvars*sizeof(ObjCIvarDecl*)); + } + setLocEnd(RBrac); +} + +/// ObjCAddInstanceVariablesToClassImpl - Checks for correctness of Instance +/// Variables (Ivars) relative to what declared in @implementation;s class. +/// Ivars into ObjCImplementationDecl's fields. +/// +void ObjCImplementationDecl::ObjCAddInstanceVariablesToClassImpl( + ObjCIvarDecl **ivars, unsigned numIvars) { + NumIvars = numIvars; + if (numIvars) { + Ivars = new ObjCIvarDecl*[numIvars]; + memcpy(Ivars, ivars, numIvars*sizeof(ObjCIvarDecl*)); + } +} + +/// addMethods - Insert instance and methods declarations into +/// ObjCInterfaceDecl's InsMethods and ClsMethods fields. +/// +void ObjCInterfaceDecl::addMethods(ObjCMethodDecl **insMethods, + unsigned numInsMembers, + ObjCMethodDecl **clsMethods, + unsigned numClsMembers, + SourceLocation endLoc) { + NumInstanceMethods = numInsMembers; + if (numInsMembers) { + InstanceMethods = new ObjCMethodDecl*[numInsMembers]; + memcpy(InstanceMethods, insMethods, numInsMembers*sizeof(ObjCMethodDecl*)); + } + NumClassMethods = numClsMembers; + if (numClsMembers) { + ClassMethods = new ObjCMethodDecl*[numClsMembers]; + memcpy(ClassMethods, clsMethods, numClsMembers*sizeof(ObjCMethodDecl*)); + } + AtEndLoc = endLoc; +} + +/// addMethods - Insert instance and methods declarations into +/// ObjCProtocolDecl's ProtoInsMethods and ProtoClsMethods fields. +/// +void ObjCProtocolDecl::addMethods(ObjCMethodDecl **insMethods, + unsigned numInsMembers, + ObjCMethodDecl **clsMethods, + unsigned numClsMembers, + SourceLocation endLoc) { + NumInstanceMethods = numInsMembers; + if (numInsMembers) { + InstanceMethods = new ObjCMethodDecl*[numInsMembers]; + memcpy(InstanceMethods, insMethods, numInsMembers*sizeof(ObjCMethodDecl*)); + } + NumClassMethods = numClsMembers; + if (numClsMembers) { + ClassMethods = new ObjCMethodDecl*[numClsMembers]; + memcpy(ClassMethods, clsMethods, numClsMembers*sizeof(ObjCMethodDecl*)); + } + AtEndLoc = endLoc; +} + +/// addMethods - Insert instance and methods declarations into +/// ObjCCategoryDecl's CatInsMethods and CatClsMethods fields. +/// +void ObjCCategoryDecl::addMethods(ObjCMethodDecl **insMethods, + unsigned numInsMembers, + ObjCMethodDecl **clsMethods, + unsigned numClsMembers, + SourceLocation endLoc) { + NumInstanceMethods = numInsMembers; + if (numInsMembers) { + InstanceMethods = new ObjCMethodDecl*[numInsMembers]; + memcpy(InstanceMethods, insMethods, numInsMembers*sizeof(ObjCMethodDecl*)); + } + NumClassMethods = numClsMembers; + if (numClsMembers) { + ClassMethods = new ObjCMethodDecl*[numClsMembers]; + memcpy(ClassMethods, clsMethods, numClsMembers*sizeof(ObjCMethodDecl*)); + } + AtEndLoc = endLoc; +} + +ObjCIvarDecl *ObjCInterfaceDecl::lookupInstanceVariable( + IdentifierInfo *ID, ObjCInterfaceDecl *&clsDeclared) { + ObjCInterfaceDecl* ClassDecl = this; + while (ClassDecl != NULL) { + for (ivar_iterator I = ClassDecl->ivar_begin(), E = ClassDecl->ivar_end(); + I != E; ++I) { + if ((*I)->getIdentifier() == ID) { + clsDeclared = ClassDecl; + return *I; + } + } + ClassDecl = ClassDecl->getSuperClass(); + } + return NULL; +} + +/// lookupInstanceMethod - This method returns an instance method by looking in +/// the class, its categories, and its super classes (using a linear search). +ObjCMethodDecl *ObjCInterfaceDecl::lookupInstanceMethod(Selector Sel) { + ObjCInterfaceDecl* ClassDecl = this; + ObjCMethodDecl *MethodDecl = 0; + + while (ClassDecl != NULL) { + if ((MethodDecl = ClassDecl->getInstanceMethod(Sel))) + return MethodDecl; + + // Didn't find one yet - look through protocols. + ObjCProtocolDecl **protocols = ClassDecl->getReferencedProtocols(); + int numProtocols = ClassDecl->getNumIntfRefProtocols(); + for (int pIdx = 0; pIdx < numProtocols; pIdx++) { + if ((MethodDecl = protocols[pIdx]->getInstanceMethod(Sel))) + return MethodDecl; + } + // Didn't find one yet - now look through categories. + ObjCCategoryDecl *CatDecl = ClassDecl->getCategoryList(); + while (CatDecl) { + if ((MethodDecl = CatDecl->getInstanceMethod(Sel))) + return MethodDecl; + CatDecl = CatDecl->getNextClassCategory(); + } + ClassDecl = ClassDecl->getSuperClass(); + } + return NULL; +} + +// lookupClassMethod - This method returns a class method by looking in the +// class, its categories, and its super classes (using a linear search). +ObjCMethodDecl *ObjCInterfaceDecl::lookupClassMethod(Selector Sel) { + ObjCInterfaceDecl* ClassDecl = this; + ObjCMethodDecl *MethodDecl = 0; + + while (ClassDecl != NULL) { + if ((MethodDecl = ClassDecl->getClassMethod(Sel))) + return MethodDecl; + + // Didn't find one yet - look through protocols. + ObjCProtocolDecl **protocols = ClassDecl->getReferencedProtocols(); + int numProtocols = ClassDecl->getNumIntfRefProtocols(); + for (int pIdx = 0; pIdx < numProtocols; pIdx++) { + if ((MethodDecl = protocols[pIdx]->getClassMethod(Sel))) + return MethodDecl; + } + // Didn't find one yet - now look through categories. + ObjCCategoryDecl *CatDecl = ClassDecl->getCategoryList(); + while (CatDecl) { + if ((MethodDecl = CatDecl->getClassMethod(Sel))) + return MethodDecl; + CatDecl = CatDecl->getNextClassCategory(); + } + ClassDecl = ClassDecl->getSuperClass(); + } + return NULL; +} + +/// lookupInstanceMethod - This method returns an instance method by looking in +/// the class implementation. Unlike interfaces, we don't look outside the +/// implementation. +ObjCMethodDecl *ObjCImplementationDecl::getInstanceMethod(Selector Sel) { + for (instmeth_iterator I = instmeth_begin(), E = instmeth_end(); I != E; ++I) + if ((*I)->getSelector() == Sel) + return *I; + return NULL; +} + +/// lookupClassMethod - This method returns a class method by looking in +/// the class implementation. Unlike interfaces, we don't look outside the +/// implementation. +ObjCMethodDecl *ObjCImplementationDecl::getClassMethod(Selector Sel) { + for (classmeth_iterator I = classmeth_begin(), E = classmeth_end(); + I != E; ++I) + if ((*I)->getSelector() == Sel) + return *I; + return NULL; +} + +// lookupInstanceMethod - This method returns an instance method by looking in +// the class implementation. Unlike interfaces, we don't look outside the +// implementation. +ObjCMethodDecl *ObjCCategoryImplDecl::getInstanceMethod(Selector Sel) { + for (instmeth_iterator I = instmeth_begin(), E = instmeth_end(); I != E; ++I) + if ((*I)->getSelector() == Sel) + return *I; + return NULL; +} + +// lookupClassMethod - This method returns an instance method by looking in +// the class implementation. Unlike interfaces, we don't look outside the +// implementation. +ObjCMethodDecl *ObjCCategoryImplDecl::getClassMethod(Selector Sel) { + for (classmeth_iterator I = classmeth_begin(), E = classmeth_end(); + I != E; ++I) + if ((*I)->getSelector() == Sel) + return *I; + return NULL; +} + +// lookupInstanceMethod - Lookup a instance method in the protocol and protocols +// it inherited. +ObjCMethodDecl *ObjCProtocolDecl::lookupInstanceMethod(Selector Sel) { + ObjCMethodDecl *MethodDecl = NULL; + + if ((MethodDecl = getInstanceMethod(Sel))) + return MethodDecl; + + if (getNumReferencedProtocols() > 0) { + ObjCProtocolDecl **RefPDecl = getReferencedProtocols(); + + for (unsigned i = 0; i < getNumReferencedProtocols(); i++) { + if ((MethodDecl = RefPDecl[i]->getInstanceMethod(Sel))) + return MethodDecl; + } + } + return NULL; +} + +// lookupInstanceMethod - Lookup a class method in the protocol and protocols +// it inherited. +ObjCMethodDecl *ObjCProtocolDecl::lookupClassMethod(Selector Sel) { + ObjCMethodDecl *MethodDecl = NULL; + + if ((MethodDecl = getClassMethod(Sel))) + return MethodDecl; + + if (getNumReferencedProtocols() > 0) { + ObjCProtocolDecl **RefPDecl = getReferencedProtocols(); + + for(unsigned i = 0; i < getNumReferencedProtocols(); i++) { + if ((MethodDecl = RefPDecl[i]->getClassMethod(Sel))) + return MethodDecl; + } + } + return NULL; +} + +/// getSynthesizedMethodSize - Compute size of synthesized method name +/// as done be the rewrite. +/// +unsigned ObjCMethodDecl::getSynthesizedMethodSize() const { + // syntesized method name is a concatenation of -/+[class-name selector] + // Get length of this name. + unsigned length = 3; // _I_ or _C_ + length += strlen(getClassInterface()->getName()) +1; // extra for _ + NamedDecl *MethodContext = getMethodContext(); + if (ObjCCategoryImplDecl *CID = + dyn_cast<ObjCCategoryImplDecl>(MethodContext)) + length += strlen(CID->getName()) +1; + length += getSelector().getName().size(); // selector name + return length; +} + +ObjCInterfaceDecl *const ObjCMethodDecl::getClassInterface() const { + if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(MethodContext)) + return ID; + if (ObjCCategoryDecl *CD = dyn_cast<ObjCCategoryDecl>(MethodContext)) + return CD->getClassInterface(); + if (ObjCImplementationDecl *IMD = + dyn_cast<ObjCImplementationDecl>(MethodContext)) + return IMD->getClassInterface(); + if (ObjCCategoryImplDecl *CID = + dyn_cast<ObjCCategoryImplDecl>(MethodContext)) + return CID->getClassInterface(); + assert(false && "unknown method context"); + return 0; +} diff --git a/clang/lib/AST/DeclSerialization.cpp b/clang/lib/AST/DeclSerialization.cpp new file mode 100644 index 00000000000..a7eaed5b2ef --- /dev/null +++ b/clang/lib/AST/DeclSerialization.cpp @@ -0,0 +1,463 @@ +//===--- DeclSerialization.cpp - Serialization of Decls ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines methods that implement bitcode serialization for Decls. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "llvm/Bitcode/Serialize.h" +#include "llvm/Bitcode/Deserialize.h" + +using llvm::Serializer; +using llvm::Deserializer; +using llvm::SerializedPtrID; + +using namespace clang; + +//===----------------------------------------------------------------------===// +// Decl Serialization: Dispatch code to handle specialized decl types. +//===----------------------------------------------------------------------===// + +void Decl::Emit(Serializer& S) const { + S.EmitInt(getKind()); + EmitImpl(S); +} + +Decl* Decl::Create(Deserializer& D) { + + Kind k = static_cast<Kind>(D.ReadInt()); + + switch (k) { + default: + assert (false && "Not implemented."); + break; + + case BlockVar: + return BlockVarDecl::CreateImpl(D); + + case Enum: + return EnumDecl::CreateImpl(D); + + case EnumConstant: + return EnumConstantDecl::CreateImpl(D); + + case Field: + return FieldDecl::CreateImpl(D); + + case FileVar: + return FileVarDecl::CreateImpl(D); + + case ParmVar: + return ParmVarDecl::CreateImpl(D); + + case Function: + return FunctionDecl::CreateImpl(D); + + case Union: + case Struct: + return RecordDecl::CreateImpl(k,D); + + case Typedef: + return TypedefDecl::CreateImpl(D); + + case FileScopeAsm: + return FileScopeAsmDecl::CreateImpl(D); + } +} + +//===----------------------------------------------------------------------===// +// Common serialization logic for subclasses of Decl. +//===----------------------------------------------------------------------===// + +void Decl::EmitInRec(Serializer& S) const { + S.Emit(getLocation()); // From Decl. +} + +void Decl::ReadInRec(Deserializer& D) { + Loc = SourceLocation::ReadVal(D); // From Decl. +} + +//===----------------------------------------------------------------------===// +// Common serialization logic for subclasses of NamedDecl. +//===----------------------------------------------------------------------===// + +void NamedDecl::EmitInRec(Serializer& S) const { + Decl::EmitInRec(S); + S.EmitPtr(getIdentifier()); // From NamedDecl. +} + +void NamedDecl::ReadInRec(Deserializer& D) { + Decl::ReadInRec(D); + D.ReadPtr(Identifier); // From NamedDecl. +} + +//===----------------------------------------------------------------------===// +// Common serialization logic for subclasses of ScopedDecl. +//===----------------------------------------------------------------------===// + +void ScopedDecl::EmitInRec(Serializer& S) const { + NamedDecl::EmitInRec(S); + S.EmitPtr(getNext()); // From ScopedDecl. +} + +void ScopedDecl::ReadInRec(Deserializer& D) { + NamedDecl::ReadInRec(D); + D.ReadPtr(Next); // From ScopedDecl. +} + + //===------------------------------------------------------------===// + // NOTE: Not all subclasses of ScopedDecl will use the "OutRec" // + // methods. This is because owned pointers are usually "batched" // + // together for efficiency. // + //===------------------------------------------------------------===// + +void ScopedDecl::EmitOutRec(Serializer& S) const { + S.EmitOwnedPtr(getNextDeclarator()); // From ScopedDecl. +} + +void ScopedDecl::ReadOutRec(Deserializer& D) { + NextDeclarator = + cast_or_null<ScopedDecl>(D.ReadOwnedPtr<Decl>()); // From ScopedDecl. +} + +//===----------------------------------------------------------------------===// +// Common serialization logic for subclasses of ValueDecl. +//===----------------------------------------------------------------------===// + +void ValueDecl::EmitInRec(Serializer& S) const { + ScopedDecl::EmitInRec(S); + S.Emit(getType()); // From ValueDecl. +} + +void ValueDecl::ReadInRec(Deserializer& D) { + ScopedDecl::ReadInRec(D); + DeclType = QualType::ReadVal(D); // From ValueDecl. +} + +//===----------------------------------------------------------------------===// +// Common serialization logic for subclasses of VarDecl. +//===----------------------------------------------------------------------===// + +void VarDecl::EmitInRec(Serializer& S) const { + ValueDecl::EmitInRec(S); + S.EmitInt(getStorageClass()); // From VarDecl. +} + +void VarDecl::ReadInRec(Deserializer& D) { + ValueDecl::ReadInRec(D); + SClass = static_cast<StorageClass>(D.ReadInt()); // From VarDecl. +} + + //===------------------------------------------------------------===// + // NOTE: VarDecl has its own "OutRec" methods that doesn't use // + // the one define in ScopedDecl. This is to batch emit the // + // owned pointers, which results in a smaller output. + //===------------------------------------------------------------===// + +void VarDecl::EmitOutRec(Serializer& S) const { + // Emit these last because they will create records of their own. + S.BatchEmitOwnedPtrs(getInit(), // From VarDecl. + getNextDeclarator()); // From ScopedDecl. +} + +void VarDecl::ReadOutRec(Deserializer& D) { + Decl* next_declarator; + + D.BatchReadOwnedPtrs(Init, // From VarDecl. + next_declarator); // From ScopedDecl. + + setNextDeclarator(cast_or_null<ScopedDecl>(next_declarator)); +} + + +void VarDecl::EmitImpl(Serializer& S) const { + VarDecl::EmitInRec(S); + VarDecl::EmitOutRec(S); +} + +void VarDecl::ReadImpl(Deserializer& D) { + ReadInRec(D); + ReadOutRec(D); +} + +//===----------------------------------------------------------------------===// +// BlockVarDecl Serialization. +//===----------------------------------------------------------------------===// + +BlockVarDecl* BlockVarDecl::CreateImpl(Deserializer& D) { + BlockVarDecl* decl = + new BlockVarDecl(SourceLocation(),NULL,QualType(),None,NULL); + + decl->VarDecl::ReadImpl(D); + + return decl; +} + +//===----------------------------------------------------------------------===// +// FileVarDecl Serialization. +//===----------------------------------------------------------------------===// + +FileVarDecl* FileVarDecl::CreateImpl(Deserializer& D) { + FileVarDecl* decl = + new FileVarDecl(SourceLocation(),NULL,QualType(),None,NULL); + + decl->VarDecl::ReadImpl(D); + + return decl; +} + +//===----------------------------------------------------------------------===// +// ParmDecl Serialization. +//===----------------------------------------------------------------------===// + +void ParmVarDecl::EmitImpl(llvm::Serializer& S) const { + VarDecl::EmitImpl(S); + S.EmitInt(getObjCDeclQualifier()); // From ParmVarDecl. +} + +ParmVarDecl* ParmVarDecl::CreateImpl(Deserializer& D) { + ParmVarDecl* decl = + new ParmVarDecl(SourceLocation(),NULL,QualType(),None,NULL); + + decl->VarDecl::ReadImpl(D); + decl->objcDeclQualifier = static_cast<ObjCDeclQualifier>(D.ReadInt()); + + return decl; +} + +//===----------------------------------------------------------------------===// +// EnumDecl Serialization. +//===----------------------------------------------------------------------===// + +void EnumDecl::EmitImpl(Serializer& S) const { + ScopedDecl::EmitInRec(S); + S.EmitBool(isDefinition()); + S.Emit(IntegerType); + S.BatchEmitOwnedPtrs(ElementList,getNextDeclarator()); +} + +EnumDecl* EnumDecl::CreateImpl(Deserializer& D) { + EnumDecl* decl = new EnumDecl(SourceLocation(),NULL,NULL); + + decl->ScopedDecl::ReadInRec(D); + decl->setDefinition(D.ReadBool()); + decl->IntegerType = QualType::ReadVal(D); + + Decl* next_declarator; + Decl* Elist; + + D.BatchReadOwnedPtrs(Elist,next_declarator); + + decl->ElementList = cast_or_null<EnumConstantDecl>(Elist); + decl->setNextDeclarator(cast_or_null<ScopedDecl>(next_declarator)); + + return decl; +} + +//===----------------------------------------------------------------------===// +// EnumConstantDecl Serialization. +//===----------------------------------------------------------------------===// + +void EnumConstantDecl::EmitImpl(Serializer& S) const { + S.Emit(Val); + ValueDecl::EmitInRec(S); + S.BatchEmitOwnedPtrs(getNextDeclarator(),Init); +} + +EnumConstantDecl* EnumConstantDecl::CreateImpl(Deserializer& D) { + llvm::APSInt val(1); + D.Read(val); + + EnumConstantDecl* decl = + new EnumConstantDecl(SourceLocation(),NULL,QualType(),NULL, + val,NULL); + + decl->ValueDecl::ReadInRec(D); + + Decl* next_declarator; + + D.BatchReadOwnedPtrs(next_declarator,decl->Init); + + decl->setNextDeclarator(cast_or_null<ScopedDecl>(next_declarator)); + + return decl; +} + +//===----------------------------------------------------------------------===// +// FieldDecl Serialization. +//===----------------------------------------------------------------------===// + +void FieldDecl::EmitImpl(Serializer& S) const { + S.Emit(getType()); + NamedDecl::EmitInRec(S); + S.EmitOwnedPtr(BitWidth); +} + +FieldDecl* FieldDecl::CreateImpl(Deserializer& D) { + FieldDecl* decl = new FieldDecl(SourceLocation(),NULL,QualType()); + decl->DeclType.ReadBackpatch(D); + decl->ReadInRec(D); + decl->BitWidth = D.ReadOwnedPtr<Expr>(); + return decl; +} + +//===----------------------------------------------------------------------===// +// FunctionDecl Serialization. +//===----------------------------------------------------------------------===// + +void FunctionDecl::EmitImpl(Serializer& S) const { + S.EmitInt(SClass); // From FunctionDecl. + S.EmitBool(IsInline); // From FunctionDecl. + ValueDecl::EmitInRec(S); + S.EmitPtr(DeclChain); + + // NOTE: We do not need to serialize out the number of parameters, because + // that is encoded in the type (accessed via getNumParams()). + + if (ParamInfo != NULL) { + S.EmitBool(true); + S.BatchEmitOwnedPtrs(getNumParams(),&ParamInfo[0], Body, + getNextDeclarator()); + } + else { + S.EmitBool(false); + S.BatchEmitOwnedPtrs(Body,getNextDeclarator()); + } +} + +FunctionDecl* FunctionDecl::CreateImpl(Deserializer& D) { + StorageClass SClass = static_cast<StorageClass>(D.ReadInt()); + bool IsInline = D.ReadBool(); + + FunctionDecl* decl = + new FunctionDecl(SourceLocation(),NULL,QualType(),SClass, IsInline, 0); + + decl->ValueDecl::ReadInRec(D); + D.ReadPtr(decl->DeclChain); + + Decl* next_declarator; + + bool hasParamDecls = D.ReadBool(); + + decl->ParamInfo = hasParamDecls + ? new ParmVarDecl*[decl->getNumParams()] + : NULL; + + if (hasParamDecls) + D.BatchReadOwnedPtrs(decl->getNumParams(), + reinterpret_cast<Decl**>(&decl->ParamInfo[0]), + decl->Body, next_declarator); + else + D.BatchReadOwnedPtrs(decl->Body, next_declarator); + + decl->setNextDeclarator(cast_or_null<ScopedDecl>(next_declarator)); + + return decl; +} + +//===----------------------------------------------------------------------===// +// RecordDecl Serialization. +//===----------------------------------------------------------------------===// + +void RecordDecl::EmitImpl(Serializer& S) const { + ScopedDecl::EmitInRec(S); + S.EmitBool(isDefinition()); + S.EmitBool(hasFlexibleArrayMember()); + S.EmitSInt(getNumMembers()); + if (getNumMembers() > 0) { + assert (Members); + S.BatchEmitOwnedPtrs((unsigned) getNumMembers(), + (Decl**) &Members[0],getNextDeclarator()); + } + else + ScopedDecl::EmitOutRec(S); +} + +RecordDecl* RecordDecl::CreateImpl(Decl::Kind DK, Deserializer& D) { + RecordDecl* decl = new RecordDecl(DK,SourceLocation(),NULL,NULL); + + decl->ScopedDecl::ReadInRec(D); + decl->setDefinition(D.ReadBool()); + decl->setHasFlexibleArrayMember(D.ReadBool()); + decl->NumMembers = D.ReadSInt(); + + if (decl->getNumMembers() > 0) { + Decl* next_declarator; + decl->Members = new FieldDecl*[(unsigned) decl->getNumMembers()]; + + D.BatchReadOwnedPtrs((unsigned) decl->getNumMembers(), + (Decl**) &decl->Members[0], + next_declarator); + + decl->setNextDeclarator(cast_or_null<ScopedDecl>(next_declarator)); + } + else + decl->ScopedDecl::ReadOutRec(D); + + return decl; +} + +//===----------------------------------------------------------------------===// +// TypedefDecl Serialization. +//===----------------------------------------------------------------------===// + +void TypedefDecl::EmitImpl(Serializer& S) const { + S.Emit(UnderlyingType); + ScopedDecl::EmitInRec(S); + ScopedDecl::EmitOutRec(S); +} + +TypedefDecl* TypedefDecl::CreateImpl(Deserializer& D) { + QualType T = QualType::ReadVal(D); + + TypedefDecl* decl = new TypedefDecl(SourceLocation(),NULL,T,NULL); + + decl->ScopedDecl::ReadInRec(D); + decl->ScopedDecl::ReadOutRec(D); + + return decl; +} + +//===----------------------------------------------------------------------===// +// LinkageSpec Serialization. +//===----------------------------------------------------------------------===// + +void LinkageSpecDecl::EmitInRec(Serializer& S) const { + Decl::EmitInRec(S); + S.EmitInt(getLanguage()); + S.EmitPtr(D); +} + +void LinkageSpecDecl::ReadInRec(Deserializer& D) { + Decl::ReadInRec(D); + Language = static_cast<LanguageIDs>(D.ReadInt()); + D.ReadPtr(this->D); +} + +//===----------------------------------------------------------------------===// +// FileScopeAsm Serialization. +//===----------------------------------------------------------------------===// + +void FileScopeAsmDecl::EmitImpl(llvm::Serializer& S) const +{ + Decl::EmitInRec(S); + S.EmitOwnedPtr(AsmString); +} + +FileScopeAsmDecl* FileScopeAsmDecl::CreateImpl(Deserializer& D) { + FileScopeAsmDecl* decl = new FileScopeAsmDecl(SourceLocation(), 0); + + decl->Decl::ReadInRec(D); + decl->AsmString = cast<StringLiteral>(D.ReadOwnedPtr<Expr>()); +// D.ReadOwnedPtr(D.ReadOwnedPtr<StringLiteral>())<#T * * Ptr#>, <#bool AutoRegister#>)(decl->AsmString); + + return decl; +} diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp new file mode 100644 index 00000000000..11fcc419a51 --- /dev/null +++ b/clang/lib/AST/Expr.cpp @@ -0,0 +1,1391 @@ +//===--- Expr.cpp - Expression AST Node Implementation --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Expr class and subclasses. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Expr.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/TargetInfo.h" +using namespace clang; + +//===----------------------------------------------------------------------===// +// Primary Expressions. +//===----------------------------------------------------------------------===// + +StringLiteral::StringLiteral(const char *strData, unsigned byteLength, + bool Wide, QualType t, SourceLocation firstLoc, + SourceLocation lastLoc) : + Expr(StringLiteralClass, t) { + // OPTIMIZE: could allocate this appended to the StringLiteral. + char *AStrData = new char[byteLength]; + memcpy(AStrData, strData, byteLength); + StrData = AStrData; + ByteLength = byteLength; + IsWide = Wide; + firstTokLoc = firstLoc; + lastTokLoc = lastLoc; +} + +StringLiteral::~StringLiteral() { + delete[] StrData; +} + +bool UnaryOperator::isPostfix(Opcode Op) { + switch (Op) { + case PostInc: + case PostDec: + return true; + default: + return false; + } +} + +/// getOpcodeStr - Turn an Opcode enum value into the punctuation char it +/// corresponds to, e.g. "sizeof" or "[pre]++". +const char *UnaryOperator::getOpcodeStr(Opcode Op) { + switch (Op) { + default: assert(0 && "Unknown unary operator"); + case PostInc: return "++"; + case PostDec: return "--"; + case PreInc: return "++"; + case PreDec: return "--"; + case AddrOf: return "&"; + case Deref: return "*"; + case Plus: return "+"; + case Minus: return "-"; + case Not: return "~"; + case LNot: return "!"; + case Real: return "__real"; + case Imag: return "__imag"; + case SizeOf: return "sizeof"; + case AlignOf: return "alignof"; + case Extension: return "__extension__"; + case OffsetOf: return "__builtin_offsetof"; + } +} + +//===----------------------------------------------------------------------===// +// Postfix Operators. +//===----------------------------------------------------------------------===// + + +CallExpr::CallExpr(Expr *fn, Expr **args, unsigned numargs, QualType t, + SourceLocation rparenloc) + : Expr(CallExprClass, t), NumArgs(numargs) { + SubExprs = new Expr*[numargs+1]; + SubExprs[FN] = fn; + for (unsigned i = 0; i != numargs; ++i) + SubExprs[i+ARGS_START] = args[i]; + RParenLoc = rparenloc; +} + +/// setNumArgs - This changes the number of arguments present in this call. +/// Any orphaned expressions are deleted by this, and any new operands are set +/// to null. +void CallExpr::setNumArgs(unsigned NumArgs) { + // No change, just return. + if (NumArgs == getNumArgs()) return; + + // If shrinking # arguments, just delete the extras and forgot them. + if (NumArgs < getNumArgs()) { + for (unsigned i = NumArgs, e = getNumArgs(); i != e; ++i) + delete getArg(i); + this->NumArgs = NumArgs; + return; + } + + // Otherwise, we are growing the # arguments. New an bigger argument array. + Expr **NewSubExprs = new Expr*[NumArgs+1]; + // Copy over args. + for (unsigned i = 0; i != getNumArgs()+ARGS_START; ++i) + NewSubExprs[i] = SubExprs[i]; + // Null out new args. + for (unsigned i = getNumArgs()+ARGS_START; i != NumArgs+ARGS_START; ++i) + NewSubExprs[i] = 0; + + delete[] SubExprs; + SubExprs = NewSubExprs; + this->NumArgs = NumArgs; +} + +bool CallExpr::isBuiltinConstantExpr() const { + // All simple function calls (e.g. func()) are implicitly cast to pointer to + // function. As a result, we try and obtain the DeclRefExpr from the + // ImplicitCastExpr. + const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(getCallee()); + if (!ICE) // FIXME: deal with more complex calls (e.g. (func)(), (*func)()). + return false; + + const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(ICE->getSubExpr()); + if (!DRE) + return false; + + const FunctionDecl *FDecl = dyn_cast<FunctionDecl>(DRE->getDecl()); + if (!FDecl) + return false; + + unsigned builtinID = FDecl->getIdentifier()->getBuiltinID(); + if (!builtinID) + return false; + + // We have a builtin that is a constant expression + if (builtinID == Builtin::BI__builtin___CFStringMakeConstantString) + return true; + return false; +} + +bool CallExpr::isBuiltinClassifyType(llvm::APSInt &Result) const { + // The following enum mimics gcc's internal "typeclass.h" file. + enum gcc_type_class { + no_type_class = -1, + void_type_class, integer_type_class, char_type_class, + enumeral_type_class, boolean_type_class, + pointer_type_class, reference_type_class, offset_type_class, + real_type_class, complex_type_class, + function_type_class, method_type_class, + record_type_class, union_type_class, + array_type_class, string_type_class, + lang_type_class + }; + Result.setIsSigned(true); + + // All simple function calls (e.g. func()) are implicitly cast to pointer to + // function. As a result, we try and obtain the DeclRefExpr from the + // ImplicitCastExpr. + const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(getCallee()); + if (!ICE) // FIXME: deal with more complex calls (e.g. (func)(), (*func)()). + return false; + const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(ICE->getSubExpr()); + if (!DRE) + return false; + + // We have a DeclRefExpr. + if (strcmp(DRE->getDecl()->getName(), "__builtin_classify_type") == 0) { + // If no argument was supplied, default to "no_type_class". This isn't + // ideal, however it's what gcc does. + Result = static_cast<uint64_t>(no_type_class); + if (NumArgs >= 1) { + QualType argType = getArg(0)->getType(); + + if (argType->isVoidType()) + Result = void_type_class; + else if (argType->isEnumeralType()) + Result = enumeral_type_class; + else if (argType->isBooleanType()) + Result = boolean_type_class; + else if (argType->isCharType()) + Result = string_type_class; // gcc doesn't appear to use char_type_class + else if (argType->isIntegerType()) + Result = integer_type_class; + else if (argType->isPointerType()) + Result = pointer_type_class; + else if (argType->isReferenceType()) + Result = reference_type_class; + else if (argType->isRealType()) + Result = real_type_class; + else if (argType->isComplexType()) + Result = complex_type_class; + else if (argType->isFunctionType()) + Result = function_type_class; + else if (argType->isStructureType()) + Result = record_type_class; + else if (argType->isUnionType()) + Result = union_type_class; + else if (argType->isArrayType()) + Result = array_type_class; + else if (argType->isUnionType()) + Result = union_type_class; + else // FIXME: offset_type_class, method_type_class, & lang_type_class? + assert(0 && "CallExpr::isBuiltinClassifyType(): unimplemented type"); + } + return true; + } + return false; +} + +/// getOpcodeStr - Turn an Opcode enum value into the punctuation char it +/// corresponds to, e.g. "<<=". +const char *BinaryOperator::getOpcodeStr(Opcode Op) { + switch (Op) { + default: assert(0 && "Unknown binary operator"); + case Mul: return "*"; + case Div: return "/"; + case Rem: return "%"; + case Add: return "+"; + case Sub: return "-"; + case Shl: return "<<"; + case Shr: return ">>"; + case LT: return "<"; + case GT: return ">"; + case LE: return "<="; + case GE: return ">="; + case EQ: return "=="; + case NE: return "!="; + case And: return "&"; + case Xor: return "^"; + case Or: return "|"; + case LAnd: return "&&"; + case LOr: return "||"; + case Assign: return "="; + case MulAssign: return "*="; + case DivAssign: return "/="; + case RemAssign: return "%="; + case AddAssign: return "+="; + case SubAssign: return "-="; + case ShlAssign: return "<<="; + case ShrAssign: return ">>="; + case AndAssign: return "&="; + case XorAssign: return "^="; + case OrAssign: return "|="; + case Comma: return ","; + } +} + +InitListExpr::InitListExpr(SourceLocation lbraceloc, + Expr **initexprs, unsigned numinits, + SourceLocation rbraceloc) + : Expr(InitListExprClass, QualType()) + , NumInits(numinits) + , LBraceLoc(lbraceloc) + , RBraceLoc(rbraceloc) +{ + InitExprs = new Expr*[numinits]; + for (unsigned i = 0; i != numinits; i++) + InitExprs[i] = initexprs[i]; +} + +//===----------------------------------------------------------------------===// +// Generic Expression Routines +//===----------------------------------------------------------------------===// + +/// hasLocalSideEffect - Return true if this immediate expression has side +/// effects, not counting any sub-expressions. +bool Expr::hasLocalSideEffect() const { + switch (getStmtClass()) { + default: + return false; + case ParenExprClass: + return cast<ParenExpr>(this)->getSubExpr()->hasLocalSideEffect(); + case UnaryOperatorClass: { + const UnaryOperator *UO = cast<UnaryOperator>(this); + + switch (UO->getOpcode()) { + default: return false; + case UnaryOperator::PostInc: + case UnaryOperator::PostDec: + case UnaryOperator::PreInc: + case UnaryOperator::PreDec: + return true; // ++/-- + + case UnaryOperator::Deref: + // Dereferencing a volatile pointer is a side-effect. + return getType().isVolatileQualified(); + case UnaryOperator::Real: + case UnaryOperator::Imag: + // accessing a piece of a volatile complex is a side-effect. + return UO->getSubExpr()->getType().isVolatileQualified(); + + case UnaryOperator::Extension: + return UO->getSubExpr()->hasLocalSideEffect(); + } + } + case BinaryOperatorClass: { + const BinaryOperator *BinOp = cast<BinaryOperator>(this); + // Consider comma to have side effects if the LHS and RHS both do. + if (BinOp->getOpcode() == BinaryOperator::Comma) + return BinOp->getLHS()->hasLocalSideEffect() && + BinOp->getRHS()->hasLocalSideEffect(); + + return BinOp->isAssignmentOp(); + } + case CompoundAssignOperatorClass: + return true; + + case ConditionalOperatorClass: { + const ConditionalOperator *Exp = cast<ConditionalOperator>(this); + return Exp->getCond()->hasLocalSideEffect() + || (Exp->getLHS() && Exp->getLHS()->hasLocalSideEffect()) + || (Exp->getRHS() && Exp->getRHS()->hasLocalSideEffect()); + } + + case MemberExprClass: + case ArraySubscriptExprClass: + // If the base pointer or element is to a volatile pointer/field, accessing + // if is a side effect. + return getType().isVolatileQualified(); + + case CallExprClass: + // TODO: check attributes for pure/const. "void foo() { strlen("bar"); }" + // should warn. + return true; + case ObjCMessageExprClass: + return true; + + case CastExprClass: + // If this is a cast to void, check the operand. Otherwise, the result of + // the cast is unused. + if (getType()->isVoidType()) + return cast<CastExpr>(this)->getSubExpr()->hasLocalSideEffect(); + return false; + } +} + +/// isLvalue - C99 6.3.2.1: an lvalue is an expression with an object type or an +/// incomplete type other than void. Nonarray expressions that can be lvalues: +/// - name, where name must be a variable +/// - e[i] +/// - (e), where e must be an lvalue +/// - e.name, where e must be an lvalue +/// - e->name +/// - *e, the type of e cannot be a function type +/// - string-constant +/// - (__real__ e) and (__imag__ e) where e is an lvalue [GNU extension] +/// - reference type [C++ [expr]] +/// +Expr::isLvalueResult Expr::isLvalue() const { + // first, check the type (C99 6.3.2.1) + if (TR->isFunctionType()) // from isObjectType() + return LV_NotObjectType; + + // Allow qualified void which is an incomplete type other than void (yuck). + if (TR->isVoidType() && !TR.getCanonicalType().getCVRQualifiers()) + return LV_IncompleteVoidType; + + if (TR->isReferenceType()) // C++ [expr] + return LV_Valid; + + // the type looks fine, now check the expression + switch (getStmtClass()) { + case StringLiteralClass: // C99 6.5.1p4 + return LV_Valid; + case ArraySubscriptExprClass: // C99 6.5.3p4 (e1[e2] == (*((e1)+(e2)))) + // For vectors, make sure base is an lvalue (i.e. not a function call). + if (cast<ArraySubscriptExpr>(this)->getBase()->getType()->isVectorType()) + return cast<ArraySubscriptExpr>(this)->getBase()->isLvalue(); + return LV_Valid; + case DeclRefExprClass: // C99 6.5.1p2 + if (isa<VarDecl>(cast<DeclRefExpr>(this)->getDecl())) + return LV_Valid; + break; + case MemberExprClass: { // C99 6.5.2.3p4 + const MemberExpr *m = cast<MemberExpr>(this); + return m->isArrow() ? LV_Valid : m->getBase()->isLvalue(); + } + case UnaryOperatorClass: + if (cast<UnaryOperator>(this)->getOpcode() == UnaryOperator::Deref) + return LV_Valid; // C99 6.5.3p4 + + if (cast<UnaryOperator>(this)->getOpcode() == UnaryOperator::Real || + cast<UnaryOperator>(this)->getOpcode() == UnaryOperator::Imag) + return cast<UnaryOperator>(this)->getSubExpr()->isLvalue(); // GNU. + break; + case ParenExprClass: // C99 6.5.1p5 + return cast<ParenExpr>(this)->getSubExpr()->isLvalue(); + case CompoundLiteralExprClass: // C99 6.5.2.5p5 + return LV_Valid; + case OCUVectorElementExprClass: + if (cast<OCUVectorElementExpr>(this)->containsDuplicateElements()) + return LV_DuplicateVectorComponents; + return LV_Valid; + case ObjCIvarRefExprClass: // ObjC instance variables are lvalues. + return LV_Valid; + case PreDefinedExprClass: + return LV_Valid; + default: + break; + } + return LV_InvalidExpression; +} + +/// isModifiableLvalue - C99 6.3.2.1: an lvalue that does not have array type, +/// does not have an incomplete type, does not have a const-qualified type, and +/// if it is a structure or union, does not have any member (including, +/// recursively, any member or element of all contained aggregates or unions) +/// with a const-qualified type. +Expr::isModifiableLvalueResult Expr::isModifiableLvalue() const { + isLvalueResult lvalResult = isLvalue(); + + switch (lvalResult) { + case LV_Valid: break; + case LV_NotObjectType: return MLV_NotObjectType; + case LV_IncompleteVoidType: return MLV_IncompleteVoidType; + case LV_DuplicateVectorComponents: return MLV_DuplicateVectorComponents; + case LV_InvalidExpression: return MLV_InvalidExpression; + } + if (TR.isConstQualified()) + return MLV_ConstQualified; + if (TR->isArrayType()) + return MLV_ArrayType; + if (TR->isIncompleteType()) + return MLV_IncompleteType; + + if (const RecordType *r = dyn_cast<RecordType>(TR.getCanonicalType())) { + if (r->hasConstFields()) + return MLV_ConstQualified; + } + return MLV_Valid; +} + +/// hasGlobalStorage - Return true if this expression has static storage +/// duration. This means that the address of this expression is a link-time +/// constant. +bool Expr::hasGlobalStorage() const { + switch (getStmtClass()) { + default: + return false; + case ParenExprClass: + return cast<ParenExpr>(this)->getSubExpr()->hasGlobalStorage(); + case ImplicitCastExprClass: + return cast<ImplicitCastExpr>(this)->getSubExpr()->hasGlobalStorage(); + case CompoundLiteralExprClass: + return cast<CompoundLiteralExpr>(this)->isFileScope(); + case DeclRefExprClass: { + const Decl *D = cast<DeclRefExpr>(this)->getDecl(); + if (const VarDecl *VD = dyn_cast<VarDecl>(D)) + return VD->hasGlobalStorage(); + return false; + } + case MemberExprClass: { + const MemberExpr *M = cast<MemberExpr>(this); + return !M->isArrow() && M->getBase()->hasGlobalStorage(); + } + case ArraySubscriptExprClass: + return cast<ArraySubscriptExpr>(this)->getBase()->hasGlobalStorage(); + case PreDefinedExprClass: + return true; + } +} + +Expr* Expr::IgnoreParens() { + Expr* E = this; + while (ParenExpr* P = dyn_cast<ParenExpr>(E)) + E = P->getSubExpr(); + + return E; +} + +/// IgnoreParenCasts - Ignore parentheses and casts. Strip off any ParenExpr +/// or CastExprs or ImplicitCastExprs, returning their operand. +Expr *Expr::IgnoreParenCasts() { + Expr *E = this; + while (true) { + if (ParenExpr *P = dyn_cast<ParenExpr>(E)) + E = P->getSubExpr(); + else if (CastExpr *P = dyn_cast<CastExpr>(E)) + E = P->getSubExpr(); + else if (ImplicitCastExpr *P = dyn_cast<ImplicitCastExpr>(E)) + E = P->getSubExpr(); + else + return E; + } +} + + +bool Expr::isConstantExpr(ASTContext &Ctx, SourceLocation *Loc) const { + switch (getStmtClass()) { + default: + if (Loc) *Loc = getLocStart(); + return false; + case ParenExprClass: + return cast<ParenExpr>(this)->getSubExpr()->isConstantExpr(Ctx, Loc); + case StringLiteralClass: + case ObjCStringLiteralClass: + case FloatingLiteralClass: + case IntegerLiteralClass: + case CharacterLiteralClass: + case ImaginaryLiteralClass: + case TypesCompatibleExprClass: + case CXXBoolLiteralExprClass: + return true; + case CallExprClass: { + const CallExpr *CE = cast<CallExpr>(this); + llvm::APSInt Result(32); + Result.zextOrTrunc(static_cast<uint32_t>(Ctx.getTypeSize(getType()))); + if (CE->isBuiltinClassifyType(Result)) + return true; + if (CE->isBuiltinConstantExpr()) + return true; + if (Loc) *Loc = getLocStart(); + return false; + } + case DeclRefExprClass: { + const Decl *D = cast<DeclRefExpr>(this)->getDecl(); + // Accept address of function. + if (isa<EnumConstantDecl>(D) || isa<FunctionDecl>(D)) + return true; + if (Loc) *Loc = getLocStart(); + if (isa<VarDecl>(D)) + return TR->isArrayType(); + return false; + } + case CompoundLiteralExprClass: + if (Loc) *Loc = getLocStart(); + // Allow "(int []){2,4}", since the array will be converted to a pointer. + // Allow "(vector type){2,4}" since the elements are all constant. + return TR->isArrayType() || TR->isVectorType(); + case UnaryOperatorClass: { + const UnaryOperator *Exp = cast<UnaryOperator>(this); + + // C99 6.6p9 + if (Exp->getOpcode() == UnaryOperator::AddrOf) { + if (!Exp->getSubExpr()->hasGlobalStorage()) { + if (Loc) *Loc = getLocStart(); + return false; + } + return true; + } + + // Get the operand value. If this is sizeof/alignof, do not evalute the + // operand. This affects C99 6.6p3. + if (!Exp->isSizeOfAlignOfOp() && + Exp->getOpcode() != UnaryOperator::OffsetOf && + !Exp->getSubExpr()->isConstantExpr(Ctx, Loc)) + return false; + + switch (Exp->getOpcode()) { + // Address, indirect, pre/post inc/dec, etc are not valid constant exprs. + // See C99 6.6p3. + default: + if (Loc) *Loc = Exp->getOperatorLoc(); + return false; + case UnaryOperator::Extension: + return true; // FIXME: this is wrong. + case UnaryOperator::SizeOf: + case UnaryOperator::AlignOf: + case UnaryOperator::OffsetOf: + // sizeof(vla) is not a constantexpr: C99 6.5.3.4p2. + if (!Exp->getSubExpr()->getType()->isConstantSizeType()) { + if (Loc) *Loc = Exp->getOperatorLoc(); + return false; + } + return true; + case UnaryOperator::LNot: + case UnaryOperator::Plus: + case UnaryOperator::Minus: + case UnaryOperator::Not: + return true; + } + } + case SizeOfAlignOfTypeExprClass: { + const SizeOfAlignOfTypeExpr *Exp = cast<SizeOfAlignOfTypeExpr>(this); + // alignof always evaluates to a constant. + if (Exp->isSizeOf() && !Exp->getArgumentType()->isVoidType() && + !Exp->getArgumentType()->isConstantSizeType()) { + if (Loc) *Loc = Exp->getOperatorLoc(); + return false; + } + return true; + } + case BinaryOperatorClass: { + const BinaryOperator *Exp = cast<BinaryOperator>(this); + + // The LHS of a constant expr is always evaluated and needed. + if (!Exp->getLHS()->isConstantExpr(Ctx, Loc)) + return false; + + if (!Exp->getRHS()->isConstantExpr(Ctx, Loc)) + return false; + return true; + } + case ImplicitCastExprClass: + case CastExprClass: { + const Expr *SubExpr; + SourceLocation CastLoc; + if (const CastExpr *C = dyn_cast<CastExpr>(this)) { + SubExpr = C->getSubExpr(); + CastLoc = C->getLParenLoc(); + } else { + SubExpr = cast<ImplicitCastExpr>(this)->getSubExpr(); + CastLoc = getLocStart(); + } + if (!SubExpr->isConstantExpr(Ctx, Loc)) { + if (Loc) *Loc = SubExpr->getLocStart(); + return false; + } + return true; + } + case ConditionalOperatorClass: { + const ConditionalOperator *Exp = cast<ConditionalOperator>(this); + if (!Exp->getCond()->isConstantExpr(Ctx, Loc) || + // Handle the GNU extension for missing LHS. + !(Exp->getLHS() && Exp->getLHS()->isConstantExpr(Ctx, Loc)) || + !Exp->getRHS()->isConstantExpr(Ctx, Loc)) + return false; + return true; + } + case InitListExprClass: { + const InitListExpr *Exp = cast<InitListExpr>(this); + unsigned numInits = Exp->getNumInits(); + for (unsigned i = 0; i < numInits; i++) { + if (!Exp->getInit(i)->isConstantExpr(Ctx, Loc)) { + if (Loc) *Loc = Exp->getInit(i)->getLocStart(); + return false; + } + } + return true; + } + } +} + +/// isIntegerConstantExpr - this recursive routine will test if an expression is +/// an integer constant expression. Note: With the introduction of VLA's in +/// C99 the result of the sizeof operator is no longer always a constant +/// expression. The generalization of the wording to include any subexpression +/// that is not evaluated (C99 6.6p3) means that nonconstant subexpressions +/// can appear as operands to other operators (e.g. &&, ||, ?:). For instance, +/// "0 || f()" can be treated as a constant expression. In C90 this expression, +/// occurring in a context requiring a constant, would have been a constraint +/// violation. FIXME: This routine currently implements C90 semantics. +/// To properly implement C99 semantics this routine will need to evaluate +/// expressions involving operators previously mentioned. + +/// FIXME: Pass up a reason why! Invalid operation in i-c-e, division by zero, +/// comma, etc +/// +/// FIXME: This should ext-warn on overflow during evaluation! ISO C does not +/// permit this. This includes things like (int)1e1000 +/// +/// FIXME: Handle offsetof. Two things to do: Handle GCC's __builtin_offsetof +/// to support gcc 4.0+ and handle the idiom GCC recognizes with a null pointer +/// cast+dereference. +bool Expr::isIntegerConstantExpr(llvm::APSInt &Result, ASTContext &Ctx, + SourceLocation *Loc, bool isEvaluated) const { + switch (getStmtClass()) { + default: + if (Loc) *Loc = getLocStart(); + return false; + case ParenExprClass: + return cast<ParenExpr>(this)->getSubExpr()-> + isIntegerConstantExpr(Result, Ctx, Loc, isEvaluated); + case IntegerLiteralClass: + Result = cast<IntegerLiteral>(this)->getValue(); + break; + case CharacterLiteralClass: { + const CharacterLiteral *CL = cast<CharacterLiteral>(this); + Result.zextOrTrunc(static_cast<uint32_t>(Ctx.getTypeSize(getType()))); + Result = CL->getValue(); + Result.setIsUnsigned(!getType()->isSignedIntegerType()); + break; + } + case TypesCompatibleExprClass: { + const TypesCompatibleExpr *TCE = cast<TypesCompatibleExpr>(this); + Result.zextOrTrunc(static_cast<uint32_t>(Ctx.getTypeSize(getType()))); + Result = Ctx.typesAreCompatible(TCE->getArgType1(), TCE->getArgType2()); + break; + } + case CallExprClass: { + const CallExpr *CE = cast<CallExpr>(this); + Result.zextOrTrunc(static_cast<uint32_t>(Ctx.getTypeSize(getType()))); + if (CE->isBuiltinClassifyType(Result)) + break; + if (Loc) *Loc = getLocStart(); + return false; + } + case DeclRefExprClass: + if (const EnumConstantDecl *D = + dyn_cast<EnumConstantDecl>(cast<DeclRefExpr>(this)->getDecl())) { + Result = D->getInitVal(); + break; + } + if (Loc) *Loc = getLocStart(); + return false; + case UnaryOperatorClass: { + const UnaryOperator *Exp = cast<UnaryOperator>(this); + + // Get the operand value. If this is sizeof/alignof, do not evalute the + // operand. This affects C99 6.6p3. + if (!Exp->isSizeOfAlignOfOp() && !Exp->isOffsetOfOp() && + !Exp->getSubExpr()->isIntegerConstantExpr(Result, Ctx, Loc,isEvaluated)) + return false; + + switch (Exp->getOpcode()) { + // Address, indirect, pre/post inc/dec, etc are not valid constant exprs. + // See C99 6.6p3. + default: + if (Loc) *Loc = Exp->getOperatorLoc(); + return false; + case UnaryOperator::Extension: + return true; // FIXME: this is wrong. + case UnaryOperator::SizeOf: + case UnaryOperator::AlignOf: + // Return the result in the right width. + Result.zextOrTrunc(static_cast<uint32_t>(Ctx.getTypeSize(getType()))); + + // sizeof(void) and __alignof__(void) = 1 as a gcc extension. + if (Exp->getSubExpr()->getType()->isVoidType()) { + Result = 1; + break; + } + + // sizeof(vla) is not a constantexpr: C99 6.5.3.4p2. + if (!Exp->getSubExpr()->getType()->isConstantSizeType()) { + if (Loc) *Loc = Exp->getOperatorLoc(); + return false; + } + + // Get information about the size or align. + if (Exp->getSubExpr()->getType()->isFunctionType()) { + // GCC extension: sizeof(function) = 1. + Result = Exp->getOpcode() == UnaryOperator::AlignOf ? 4 : 1; + } else { + unsigned CharSize = Ctx.Target.getCharWidth(); + if (Exp->getOpcode() == UnaryOperator::AlignOf) + Result = Ctx.getTypeAlign(Exp->getSubExpr()->getType()) / CharSize; + else + Result = Ctx.getTypeSize(Exp->getSubExpr()->getType()) / CharSize; + } + break; + case UnaryOperator::LNot: { + bool Val = Result == 0; + Result.zextOrTrunc(static_cast<uint32_t>(Ctx.getTypeSize(getType()))); + Result = Val; + break; + } + case UnaryOperator::Plus: + break; + case UnaryOperator::Minus: + Result = -Result; + break; + case UnaryOperator::Not: + Result = ~Result; + break; + case UnaryOperator::OffsetOf: + Result = Exp->evaluateOffsetOf(Ctx); + } + break; + } + case SizeOfAlignOfTypeExprClass: { + const SizeOfAlignOfTypeExpr *Exp = cast<SizeOfAlignOfTypeExpr>(this); + + // Return the result in the right width. + Result.zextOrTrunc(static_cast<uint32_t>(Ctx.getTypeSize(getType()))); + + // sizeof(void) and __alignof__(void) = 1 as a gcc extension. + if (Exp->getArgumentType()->isVoidType()) { + Result = 1; + break; + } + + // alignof always evaluates to a constant, sizeof does if arg is not VLA. + if (Exp->isSizeOf() && !Exp->getArgumentType()->isConstantSizeType()) { + if (Loc) *Loc = Exp->getOperatorLoc(); + return false; + } + + // Get information about the size or align. + if (Exp->getArgumentType()->isFunctionType()) { + // GCC extension: sizeof(function) = 1. + Result = Exp->isSizeOf() ? 1 : 4; + } else { + unsigned CharSize = Ctx.Target.getCharWidth(); + if (Exp->isSizeOf()) + Result = Ctx.getTypeSize(Exp->getArgumentType()) / CharSize; + else + Result = Ctx.getTypeAlign(Exp->getArgumentType()) / CharSize; + } + break; + } + case BinaryOperatorClass: { + const BinaryOperator *Exp = cast<BinaryOperator>(this); + + // The LHS of a constant expr is always evaluated and needed. + if (!Exp->getLHS()->isIntegerConstantExpr(Result, Ctx, Loc, isEvaluated)) + return false; + + llvm::APSInt RHS(Result); + + // The short-circuiting &&/|| operators don't necessarily evaluate their + // RHS. Make sure to pass isEvaluated down correctly. + if (Exp->isLogicalOp()) { + bool RHSEval; + if (Exp->getOpcode() == BinaryOperator::LAnd) + RHSEval = Result != 0; + else { + assert(Exp->getOpcode() == BinaryOperator::LOr &&"Unexpected logical"); + RHSEval = Result == 0; + } + + if (!Exp->getRHS()->isIntegerConstantExpr(RHS, Ctx, Loc, + isEvaluated & RHSEval)) + return false; + } else { + if (!Exp->getRHS()->isIntegerConstantExpr(RHS, Ctx, Loc, isEvaluated)) + return false; + } + + switch (Exp->getOpcode()) { + default: + if (Loc) *Loc = getLocStart(); + return false; + case BinaryOperator::Mul: + Result *= RHS; + break; + case BinaryOperator::Div: + if (RHS == 0) { + if (!isEvaluated) break; + if (Loc) *Loc = getLocStart(); + return false; + } + Result /= RHS; + break; + case BinaryOperator::Rem: + if (RHS == 0) { + if (!isEvaluated) break; + if (Loc) *Loc = getLocStart(); + return false; + } + Result %= RHS; + break; + case BinaryOperator::Add: Result += RHS; break; + case BinaryOperator::Sub: Result -= RHS; break; + case BinaryOperator::Shl: + Result <<= + static_cast<uint32_t>(RHS.getLimitedValue(Result.getBitWidth()-1)); + break; + case BinaryOperator::Shr: + Result >>= + static_cast<uint32_t>(RHS.getLimitedValue(Result.getBitWidth()-1)); + break; + case BinaryOperator::LT: Result = Result < RHS; break; + case BinaryOperator::GT: Result = Result > RHS; break; + case BinaryOperator::LE: Result = Result <= RHS; break; + case BinaryOperator::GE: Result = Result >= RHS; break; + case BinaryOperator::EQ: Result = Result == RHS; break; + case BinaryOperator::NE: Result = Result != RHS; break; + case BinaryOperator::And: Result &= RHS; break; + case BinaryOperator::Xor: Result ^= RHS; break; + case BinaryOperator::Or: Result |= RHS; break; + case BinaryOperator::LAnd: + Result = Result != 0 && RHS != 0; + break; + case BinaryOperator::LOr: + Result = Result != 0 || RHS != 0; + break; + + case BinaryOperator::Comma: + // C99 6.6p3: "shall not contain assignment, ..., or comma operators, + // *except* when they are contained within a subexpression that is not + // evaluated". Note that Assignment can never happen due to constraints + // on the LHS subexpr, so we don't need to check it here. + if (isEvaluated) { + if (Loc) *Loc = getLocStart(); + return false; + } + + // The result of the constant expr is the RHS. + Result = RHS; + return true; + } + + assert(!Exp->isAssignmentOp() && "LHS can't be a constant expr!"); + break; + } + case ImplicitCastExprClass: + case CastExprClass: { + const Expr *SubExpr; + SourceLocation CastLoc; + if (const CastExpr *C = dyn_cast<CastExpr>(this)) { + SubExpr = C->getSubExpr(); + CastLoc = C->getLParenLoc(); + } else { + SubExpr = cast<ImplicitCastExpr>(this)->getSubExpr(); + CastLoc = getLocStart(); + } + + // C99 6.6p6: shall only convert arithmetic types to integer types. + if (!SubExpr->getType()->isArithmeticType() || + !getType()->isIntegerType()) { + if (Loc) *Loc = SubExpr->getLocStart(); + return false; + } + + uint32_t DestWidth = static_cast<uint32_t>(Ctx.getTypeSize(getType())); + + // Handle simple integer->integer casts. + if (SubExpr->getType()->isIntegerType()) { + if (!SubExpr->isIntegerConstantExpr(Result, Ctx, Loc, isEvaluated)) + return false; + + // Figure out if this is a truncate, extend or noop cast. + // If the input is signed, do a sign extend, noop, or truncate. + if (getType()->isBooleanType()) { + // Conversion to bool compares against zero. + Result = Result != 0; + Result.zextOrTrunc(DestWidth); + } else if (SubExpr->getType()->isSignedIntegerType()) + Result.sextOrTrunc(DestWidth); + else // If the input is unsigned, do a zero extend, noop, or truncate. + Result.zextOrTrunc(DestWidth); + break; + } + + // Allow floating constants that are the immediate operands of casts or that + // are parenthesized. + const Expr *Operand = SubExpr; + while (const ParenExpr *PE = dyn_cast<ParenExpr>(Operand)) + Operand = PE->getSubExpr(); + + // If this isn't a floating literal, we can't handle it. + const FloatingLiteral *FL = dyn_cast<FloatingLiteral>(Operand); + if (!FL) { + if (Loc) *Loc = Operand->getLocStart(); + return false; + } + + // If the destination is boolean, compare against zero. + if (getType()->isBooleanType()) { + Result = !FL->getValue().isZero(); + Result.zextOrTrunc(DestWidth); + break; + } + + // Determine whether we are converting to unsigned or signed. + bool DestSigned = getType()->isSignedIntegerType(); + + // TODO: Warn on overflow, but probably not here: isIntegerConstantExpr can + // be called multiple times per AST. + uint64_t Space[4]; + (void)FL->getValue().convertToInteger(Space, DestWidth, DestSigned, + llvm::APFloat::rmTowardZero); + Result = llvm::APInt(DestWidth, 4, Space); + break; + } + case ConditionalOperatorClass: { + const ConditionalOperator *Exp = cast<ConditionalOperator>(this); + + if (!Exp->getCond()->isIntegerConstantExpr(Result, Ctx, Loc, isEvaluated)) + return false; + + const Expr *TrueExp = Exp->getLHS(); + const Expr *FalseExp = Exp->getRHS(); + if (Result == 0) std::swap(TrueExp, FalseExp); + + // Evaluate the false one first, discard the result. + if (FalseExp && !FalseExp->isIntegerConstantExpr(Result, Ctx, Loc, false)) + return false; + // Evalute the true one, capture the result. + if (TrueExp && + !TrueExp->isIntegerConstantExpr(Result, Ctx, Loc, isEvaluated)) + return false; + break; + } + } + + // Cases that are valid constant exprs fall through to here. + Result.setIsUnsigned(getType()->isUnsignedIntegerType()); + return true; +} + +/// isNullPointerConstant - C99 6.3.2.3p3 - Return true if this is either an +/// integer constant expression with the value zero, or if this is one that is +/// cast to void*. +bool Expr::isNullPointerConstant(ASTContext &Ctx) const { + // Strip off a cast to void*, if it exists. + if (const CastExpr *CE = dyn_cast<CastExpr>(this)) { + // Check that it is a cast to void*. + if (const PointerType *PT = CE->getType()->getAsPointerType()) { + QualType Pointee = PT->getPointeeType(); + if (Pointee.getCVRQualifiers() == 0 && + Pointee->isVoidType() && // to void* + CE->getSubExpr()->getType()->isIntegerType()) // from int. + return CE->getSubExpr()->isNullPointerConstant(Ctx); + } + } else if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(this)) { + // Ignore the ImplicitCastExpr type entirely. + return ICE->getSubExpr()->isNullPointerConstant(Ctx); + } else if (const ParenExpr *PE = dyn_cast<ParenExpr>(this)) { + // Accept ((void*)0) as a null pointer constant, as many other + // implementations do. + return PE->getSubExpr()->isNullPointerConstant(Ctx); + } + + // This expression must be an integer type. + if (!getType()->isIntegerType()) + return false; + + // If we have an integer constant expression, we need to *evaluate* it and + // test for the value 0. + llvm::APSInt Val(32); + return isIntegerConstantExpr(Val, Ctx, 0, true) && Val == 0; +} + +unsigned OCUVectorElementExpr::getNumElements() const { + return strlen(Accessor.getName()); +} + + +/// getComponentType - Determine whether the components of this access are +/// "point" "color" or "texture" elements. +OCUVectorElementExpr::ElementType +OCUVectorElementExpr::getElementType() const { + // derive the component type, no need to waste space. + const char *compStr = Accessor.getName(); + + if (OCUVectorType::getPointAccessorIdx(*compStr) != -1) return Point; + if (OCUVectorType::getColorAccessorIdx(*compStr) != -1) return Color; + + assert(OCUVectorType::getTextureAccessorIdx(*compStr) != -1 && + "getComponentType(): Illegal accessor"); + return Texture; +} + +/// containsDuplicateElements - Return true if any element access is +/// repeated. +bool OCUVectorElementExpr::containsDuplicateElements() const { + const char *compStr = Accessor.getName(); + unsigned length = strlen(compStr); + + for (unsigned i = 0; i < length-1; i++) { + const char *s = compStr+i; + for (const char c = *s++; *s; s++) + if (c == *s) + return true; + } + return false; +} + +/// getEncodedElementAccess - We encode fields with two bits per component. +unsigned OCUVectorElementExpr::getEncodedElementAccess() const { + const char *compStr = Accessor.getName(); + unsigned length = getNumElements(); + + unsigned Result = 0; + + while (length--) { + Result <<= 2; + int Idx = OCUVectorType::getAccessorIdx(compStr[length]); + assert(Idx != -1 && "Invalid accessor letter"); + Result |= Idx; + } + return Result; +} + +// constructor for instance messages. +ObjCMessageExpr::ObjCMessageExpr(Expr *receiver, Selector selInfo, + QualType retType, ObjCMethodDecl *mproto, + SourceLocation LBrac, SourceLocation RBrac, + Expr **ArgExprs, unsigned nargs) + : Expr(ObjCMessageExprClass, retType), SelName(selInfo), + MethodProto(mproto), ClassName(0) { + NumArgs = nargs; + SubExprs = new Expr*[NumArgs+1]; + SubExprs[RECEIVER] = receiver; + if (NumArgs) { + for (unsigned i = 0; i != NumArgs; ++i) + SubExprs[i+ARGS_START] = static_cast<Expr *>(ArgExprs[i]); + } + LBracloc = LBrac; + RBracloc = RBrac; +} + +// constructor for class messages. +// FIXME: clsName should be typed to ObjCInterfaceType +ObjCMessageExpr::ObjCMessageExpr(IdentifierInfo *clsName, Selector selInfo, + QualType retType, ObjCMethodDecl *mproto, + SourceLocation LBrac, SourceLocation RBrac, + Expr **ArgExprs, unsigned nargs) + : Expr(ObjCMessageExprClass, retType), SelName(selInfo), + MethodProto(mproto), ClassName(clsName) { + NumArgs = nargs; + SubExprs = new Expr*[NumArgs+1]; + SubExprs[RECEIVER] = 0; + if (NumArgs) { + for (unsigned i = 0; i != NumArgs; ++i) + SubExprs[i+ARGS_START] = static_cast<Expr *>(ArgExprs[i]); + } + LBracloc = LBrac; + RBracloc = RBrac; +} + + +bool ChooseExpr::isConditionTrue(ASTContext &C) const { + llvm::APSInt CondVal(32); + bool IsConst = getCond()->isIntegerConstantExpr(CondVal, C); + assert(IsConst && "Condition of choose expr must be i-c-e"); IsConst=IsConst; + return CondVal != 0; +} + +static int64_t evaluateOffsetOf(ASTContext& C, const Expr *E) +{ + if (const MemberExpr *ME = dyn_cast<MemberExpr>(E)) { + QualType Ty = ME->getBase()->getType(); + + RecordDecl *RD = Ty->getAsRecordType()->getDecl(); + const ASTRecordLayout &RL = C.getASTRecordLayout(RD); + FieldDecl *FD = ME->getMemberDecl(); + + // FIXME: This is linear time. + unsigned i = 0, e = 0; + for (i = 0, e = RD->getNumMembers(); i != e; i++) { + if (RD->getMember(i) == FD) + break; + } + + return RL.getFieldOffset(i) + evaluateOffsetOf(C, ME->getBase()); + } else if (const ArraySubscriptExpr *ASE = dyn_cast<ArraySubscriptExpr>(E)) { + const Expr *Base = ASE->getBase(); + llvm::APSInt Idx(32); + bool ICE = ASE->getIdx()->isIntegerConstantExpr(Idx, C); + assert(ICE && "Array index is not a constant integer!"); + + int64_t size = C.getTypeSize(ASE->getType()); + size *= Idx.getSExtValue(); + + return size + evaluateOffsetOf(C, Base); + } else if (isa<CompoundLiteralExpr>(E)) + return 0; + + assert(0 && "Unknown offsetof subexpression!"); + return 0; +} + +int64_t UnaryOperator::evaluateOffsetOf(ASTContext& C) const +{ + assert(Opc == OffsetOf && "Unary operator not offsetof!"); + + unsigned CharSize = C.Target.getCharWidth(); + return ::evaluateOffsetOf(C, Val) / CharSize; +} + +//===----------------------------------------------------------------------===// +// Child Iterators for iterating over subexpressions/substatements +//===----------------------------------------------------------------------===// + +// DeclRefExpr +Stmt::child_iterator DeclRefExpr::child_begin() { return child_iterator(); } +Stmt::child_iterator DeclRefExpr::child_end() { return child_iterator(); } + +// ObjCIvarRefExpr +Stmt::child_iterator ObjCIvarRefExpr::child_begin() { return child_iterator(); } +Stmt::child_iterator ObjCIvarRefExpr::child_end() { return child_iterator(); } + +// PreDefinedExpr +Stmt::child_iterator PreDefinedExpr::child_begin() { return child_iterator(); } +Stmt::child_iterator PreDefinedExpr::child_end() { return child_iterator(); } + +// IntegerLiteral +Stmt::child_iterator IntegerLiteral::child_begin() { return child_iterator(); } +Stmt::child_iterator IntegerLiteral::child_end() { return child_iterator(); } + +// CharacterLiteral +Stmt::child_iterator CharacterLiteral::child_begin() { return child_iterator(); } +Stmt::child_iterator CharacterLiteral::child_end() { return child_iterator(); } + +// FloatingLiteral +Stmt::child_iterator FloatingLiteral::child_begin() { return child_iterator(); } +Stmt::child_iterator FloatingLiteral::child_end() { return child_iterator(); } + +// ImaginaryLiteral +Stmt::child_iterator ImaginaryLiteral::child_begin() { + return reinterpret_cast<Stmt**>(&Val); +} +Stmt::child_iterator ImaginaryLiteral::child_end() { + return reinterpret_cast<Stmt**>(&Val)+1; +} + +// StringLiteral +Stmt::child_iterator StringLiteral::child_begin() { return child_iterator(); } +Stmt::child_iterator StringLiteral::child_end() { return child_iterator(); } + +// ParenExpr +Stmt::child_iterator ParenExpr::child_begin() { + return reinterpret_cast<Stmt**>(&Val); +} +Stmt::child_iterator ParenExpr::child_end() { + return reinterpret_cast<Stmt**>(&Val)+1; +} + +// UnaryOperator +Stmt::child_iterator UnaryOperator::child_begin() { + return reinterpret_cast<Stmt**>(&Val); +} +Stmt::child_iterator UnaryOperator::child_end() { + return reinterpret_cast<Stmt**>(&Val+1); +} + +// SizeOfAlignOfTypeExpr +Stmt::child_iterator SizeOfAlignOfTypeExpr::child_begin() { + // If the type is a VLA type (and not a typedef), the size expression of the + // VLA needs to be treated as an executable expression. + if (VariableArrayType* T = dyn_cast<VariableArrayType>(Ty.getTypePtr())) + return child_iterator(T); + else + return child_iterator(); +} +Stmt::child_iterator SizeOfAlignOfTypeExpr::child_end() { + return child_iterator(); +} + +// ArraySubscriptExpr +Stmt::child_iterator ArraySubscriptExpr::child_begin() { + return reinterpret_cast<Stmt**>(&SubExprs); +} +Stmt::child_iterator ArraySubscriptExpr::child_end() { + return reinterpret_cast<Stmt**>(&SubExprs)+END_EXPR; +} + +// CallExpr +Stmt::child_iterator CallExpr::child_begin() { + return reinterpret_cast<Stmt**>(&SubExprs[0]); +} +Stmt::child_iterator CallExpr::child_end() { + return reinterpret_cast<Stmt**>(&SubExprs[NumArgs+ARGS_START]); +} + +// MemberExpr +Stmt::child_iterator MemberExpr::child_begin() { + return reinterpret_cast<Stmt**>(&Base); +} +Stmt::child_iterator MemberExpr::child_end() { + return reinterpret_cast<Stmt**>(&Base)+1; +} + +// OCUVectorElementExpr +Stmt::child_iterator OCUVectorElementExpr::child_begin() { + return reinterpret_cast<Stmt**>(&Base); +} +Stmt::child_iterator OCUVectorElementExpr::child_end() { + return reinterpret_cast<Stmt**>(&Base)+1; +} + +// CompoundLiteralExpr +Stmt::child_iterator CompoundLiteralExpr::child_begin() { + return reinterpret_cast<Stmt**>(&Init); +} +Stmt::child_iterator CompoundLiteralExpr::child_end() { + return reinterpret_cast<Stmt**>(&Init)+1; +} + +// ImplicitCastExpr +Stmt::child_iterator ImplicitCastExpr::child_begin() { + return reinterpret_cast<Stmt**>(&Op); +} +Stmt::child_iterator ImplicitCastExpr::child_end() { + return reinterpret_cast<Stmt**>(&Op)+1; +} + +// CastExpr +Stmt::child_iterator CastExpr::child_begin() { + return reinterpret_cast<Stmt**>(&Op); +} +Stmt::child_iterator CastExpr::child_end() { + return reinterpret_cast<Stmt**>(&Op)+1; +} + +// BinaryOperator +Stmt::child_iterator BinaryOperator::child_begin() { + return reinterpret_cast<Stmt**>(&SubExprs); +} +Stmt::child_iterator BinaryOperator::child_end() { + return reinterpret_cast<Stmt**>(&SubExprs)+END_EXPR; +} + +// ConditionalOperator +Stmt::child_iterator ConditionalOperator::child_begin() { + return reinterpret_cast<Stmt**>(&SubExprs); +} +Stmt::child_iterator ConditionalOperator::child_end() { + return reinterpret_cast<Stmt**>(&SubExprs)+END_EXPR; +} + +// AddrLabelExpr +Stmt::child_iterator AddrLabelExpr::child_begin() { return child_iterator(); } +Stmt::child_iterator AddrLabelExpr::child_end() { return child_iterator(); } + +// StmtExpr +Stmt::child_iterator StmtExpr::child_begin() { + return reinterpret_cast<Stmt**>(&SubStmt); +} +Stmt::child_iterator StmtExpr::child_end() { + return reinterpret_cast<Stmt**>(&SubStmt)+1; +} + +// TypesCompatibleExpr +Stmt::child_iterator TypesCompatibleExpr::child_begin() { + return child_iterator(); +} + +Stmt::child_iterator TypesCompatibleExpr::child_end() { + return child_iterator(); +} + +// ChooseExpr +Stmt::child_iterator ChooseExpr::child_begin() { + return reinterpret_cast<Stmt**>(&SubExprs); +} + +Stmt::child_iterator ChooseExpr::child_end() { + return reinterpret_cast<Stmt**>(&SubExprs)+END_EXPR; +} + +// OverloadExpr +Stmt::child_iterator OverloadExpr::child_begin() { + return reinterpret_cast<Stmt**>(&SubExprs[0]); +} +Stmt::child_iterator OverloadExpr::child_end() { + return reinterpret_cast<Stmt**>(&SubExprs[NumExprs]); +} + +// VAArgExpr +Stmt::child_iterator VAArgExpr::child_begin() { + return reinterpret_cast<Stmt**>(&Val); +} + +Stmt::child_iterator VAArgExpr::child_end() { + return reinterpret_cast<Stmt**>(&Val)+1; +} + +// InitListExpr +Stmt::child_iterator InitListExpr::child_begin() { + return reinterpret_cast<Stmt**>(&InitExprs[0]); +} +Stmt::child_iterator InitListExpr::child_end() { + return reinterpret_cast<Stmt**>(&InitExprs[NumInits]); +} + +// ObjCStringLiteral +Stmt::child_iterator ObjCStringLiteral::child_begin() { + return child_iterator(); +} +Stmt::child_iterator ObjCStringLiteral::child_end() { + return child_iterator(); +} + +// ObjCEncodeExpr +Stmt::child_iterator ObjCEncodeExpr::child_begin() { return child_iterator(); } +Stmt::child_iterator ObjCEncodeExpr::child_end() { return child_iterator(); } + +// ObjCSelectorExpr +Stmt::child_iterator ObjCSelectorExpr::child_begin() { + return child_iterator(); +} +Stmt::child_iterator ObjCSelectorExpr::child_end() { + return child_iterator(); +} + +// ObjCProtocolExpr +Stmt::child_iterator ObjCProtocolExpr::child_begin() { + return child_iterator(); +} +Stmt::child_iterator ObjCProtocolExpr::child_end() { + return child_iterator(); +} + +// ObjCMessageExpr +Stmt::child_iterator ObjCMessageExpr::child_begin() { + return reinterpret_cast<Stmt**>(&SubExprs[0]); +} +Stmt::child_iterator ObjCMessageExpr::child_end() { + return reinterpret_cast<Stmt**>(&SubExprs[getNumArgs()+ARGS_START]); +} + diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp new file mode 100644 index 00000000000..3bc32e75d87 --- /dev/null +++ b/clang/lib/AST/ExprCXX.cpp @@ -0,0 +1,47 @@ +//===--- ExprCXX.cpp - (C++) Expression AST Node Implementation -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the subclesses of Expr class declared in ExprCXX.h +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/ExprCXX.h" +using namespace clang; + +//===----------------------------------------------------------------------===// +// Child Iterators for iterating over subexpressions/substatements +//===----------------------------------------------------------------------===// + + +// CXXCastExpr +Stmt::child_iterator CXXCastExpr::child_begin() { + return reinterpret_cast<Stmt**>(&Op); +} +Stmt::child_iterator CXXCastExpr::child_end() { + return reinterpret_cast<Stmt**>(&Op)+1; +} + +// CXXBoolLiteralExpr +Stmt::child_iterator CXXBoolLiteralExpr::child_begin() { + return child_iterator(); +} +Stmt::child_iterator CXXBoolLiteralExpr::child_end() { + return child_iterator(); +} + +// CXXThrowExpr +Stmt::child_iterator CXXThrowExpr::child_begin() { + return reinterpret_cast<Stmt**>(&Op); +} +Stmt::child_iterator CXXThrowExpr::child_end() { + // If Op is 0, we are processing throw; which has no children. + if (Op == 0) + return reinterpret_cast<Stmt**>(&Op)+0; + return reinterpret_cast<Stmt**>(&Op)+1; +} diff --git a/clang/lib/AST/Makefile b/clang/lib/AST/Makefile new file mode 100644 index 00000000000..cdfc64cacaf --- /dev/null +++ b/clang/lib/AST/Makefile @@ -0,0 +1,22 @@ +##===- clang/lib/AST/Makefile ------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements the AST library for the C-Language front-end. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME := clangAST +BUILD_ARCHIVE = 1 +CXXFLAGS = -fno-rtti + +CPPFLAGS += -I$(PROJ_SRC_DIR)/../../include + +include $(LEVEL)/Makefile.common + diff --git a/clang/lib/AST/Stmt.cpp b/clang/lib/AST/Stmt.cpp new file mode 100644 index 00000000000..572280bc054 --- /dev/null +++ b/clang/lib/AST/Stmt.cpp @@ -0,0 +1,293 @@ +//===--- Stmt.cpp - Statement AST Node Implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Stmt class and statement subclasses. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Stmt.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Basic/IdentifierTable.h" +using namespace clang; + +static struct StmtClassNameTable { + const char *Name; + unsigned Counter; + unsigned Size; +} StmtClassInfo[Stmt::lastExprConstant+1]; + +static StmtClassNameTable &getStmtInfoTableEntry(Stmt::StmtClass E) { + static bool Initialized = false; + if (Initialized) + return StmtClassInfo[E]; + + // Intialize the table on the first use. + Initialized = true; +#define STMT(N, CLASS, PARENT) \ + StmtClassInfo[N].Name = #CLASS; \ + StmtClassInfo[N].Size = sizeof(CLASS); +#include "clang/AST/StmtNodes.def" + + return StmtClassInfo[E]; +} + +const char *Stmt::getStmtClassName() const { + return getStmtInfoTableEntry(sClass).Name; +} + +void Stmt::PrintStats() { + // Ensure the table is primed. + getStmtInfoTableEntry(Stmt::NullStmtClass); + + unsigned sum = 0; + fprintf(stderr, "*** Stmt/Expr Stats:\n"); + for (int i = 0; i != Stmt::lastExprConstant+1; i++) { + if (StmtClassInfo[i].Name == 0) continue; + sum += StmtClassInfo[i].Counter; + } + fprintf(stderr, " %d stmts/exprs total.\n", sum); + sum = 0; + for (int i = 0; i != Stmt::lastExprConstant+1; i++) { + if (StmtClassInfo[i].Name == 0) continue; + fprintf(stderr, " %d %s, %d each (%d bytes)\n", + StmtClassInfo[i].Counter, StmtClassInfo[i].Name, + StmtClassInfo[i].Size, + StmtClassInfo[i].Counter*StmtClassInfo[i].Size); + sum += StmtClassInfo[i].Counter*StmtClassInfo[i].Size; + } + fprintf(stderr, "Total bytes = %d\n", sum); +} + +void Stmt::addStmtClass(StmtClass s) { + ++getStmtInfoTableEntry(s).Counter; +} + +static bool StatSwitch = false; + +bool Stmt::CollectingStats(bool enable) { + if (enable) StatSwitch = true; + return StatSwitch; +} + + +const char *LabelStmt::getName() const { + return getID()->getName(); +} + +// This is defined here to avoid polluting Stmt.h with importing Expr.h +SourceRange ReturnStmt::getSourceRange() const { + if (RetExpr) + return SourceRange(RetLoc, RetExpr->getLocEnd()); + else + return SourceRange(RetLoc); +} + +bool Stmt::hasImplicitControlFlow() const { + switch (sClass) { + default: + return false; + + case CallExprClass: + case ConditionalOperatorClass: + case ChooseExprClass: + case StmtExprClass: + case DeclStmtClass: + return true; + + case Stmt::BinaryOperatorClass: { + const BinaryOperator* B = cast<BinaryOperator>(this); + if (B->isLogicalOp() || B->getOpcode() == BinaryOperator::Comma) + return true; + else + return false; + } + } +} + +//===----------------------------------------------------------------------===// +// Constructors +//===----------------------------------------------------------------------===// + +AsmStmt::AsmStmt(SourceLocation asmloc, bool issimple, bool isvolatile, + unsigned numoutputs, unsigned numinputs, + std::string *names, StringLiteral **constraints, + Expr **exprs, StringLiteral *asmstr, unsigned numclobbers, + StringLiteral **clobbers, SourceLocation rparenloc) + : Stmt(AsmStmtClass), AsmLoc(asmloc), RParenLoc(rparenloc), AsmStr(asmstr) + , IsSimple(issimple), IsVolatile(isvolatile) + , NumOutputs(numoutputs), NumInputs(numinputs) { + for (unsigned i = 0, e = numinputs + numoutputs; i != e; i++) { + Names.push_back(names[i]); + Exprs.push_back(exprs[i]); + Constraints.push_back(constraints[i]); + } + + for (unsigned i = 0; i != numclobbers; i++) + Clobbers.push_back(clobbers[i]); +} + +ObjCForCollectionStmt::ObjCForCollectionStmt(Stmt *Elem, Expr *Collect, + Stmt *Body, SourceLocation FCL, + SourceLocation RPL) +: Stmt(ObjCForCollectionStmtClass) { + SubExprs[ELEM] = Elem; + SubExprs[COLLECTION] = reinterpret_cast<Stmt*>(Collect); + SubExprs[BODY] = Body; + ForLoc = FCL; + RParenLoc = RPL; +} + + +ObjCAtCatchStmt::ObjCAtCatchStmt(SourceLocation atCatchLoc, + SourceLocation rparenloc, + Stmt *catchVarStmtDecl, Stmt *atCatchStmt, + Stmt *atCatchList) +: Stmt(ObjCAtCatchStmtClass) { + SubExprs[SELECTOR] = catchVarStmtDecl; + SubExprs[BODY] = atCatchStmt; + if (!atCatchList) + SubExprs[NEXT_CATCH] = NULL; + else { + ObjCAtCatchStmt *AtCatchList = static_cast<ObjCAtCatchStmt*>(atCatchList); + + while (ObjCAtCatchStmt* NextCatch = AtCatchList->getNextCatchStmt()) + AtCatchList = NextCatch; + + AtCatchList->SubExprs[NEXT_CATCH] = this; + } + AtCatchLoc = atCatchLoc; + RParenLoc = rparenloc; +} + + +//===----------------------------------------------------------------------===// +// Child Iterators for iterating over subexpressions/substatements +//===----------------------------------------------------------------------===// + +// DeclStmt +Stmt::child_iterator DeclStmt::child_begin() { return getDecl(); } +Stmt::child_iterator DeclStmt::child_end() { return child_iterator(); } + +// NullStmt +Stmt::child_iterator NullStmt::child_begin() { return child_iterator(); } +Stmt::child_iterator NullStmt::child_end() { return child_iterator(); } + +// CompoundStmt +Stmt::child_iterator CompoundStmt::child_begin() { return &Body[0]; } +Stmt::child_iterator CompoundStmt::child_end() { return &Body[0]+Body.size(); } + +// CaseStmt +Stmt::child_iterator CaseStmt::child_begin() { return &SubExprs[0]; } +Stmt::child_iterator CaseStmt::child_end() { return &SubExprs[END_EXPR]; } + +// DefaultStmt +Stmt::child_iterator DefaultStmt::child_begin() { return &SubStmt; } +Stmt::child_iterator DefaultStmt::child_end() { return &SubStmt+1; } + +// LabelStmt +Stmt::child_iterator LabelStmt::child_begin() { return &SubStmt; } +Stmt::child_iterator LabelStmt::child_end() { return &SubStmt+1; } + +// IfStmt +Stmt::child_iterator IfStmt::child_begin() { return &SubExprs[0]; } +Stmt::child_iterator IfStmt::child_end() { return &SubExprs[0]+END_EXPR; } + +// SwitchStmt +Stmt::child_iterator SwitchStmt::child_begin() { return &SubExprs[0]; } +Stmt::child_iterator SwitchStmt::child_end() { return &SubExprs[0]+END_EXPR; } + +// WhileStmt +Stmt::child_iterator WhileStmt::child_begin() { return &SubExprs[0]; } +Stmt::child_iterator WhileStmt::child_end() { return &SubExprs[0]+END_EXPR; } + +// DoStmt +Stmt::child_iterator DoStmt::child_begin() { return &SubExprs[0]; } +Stmt::child_iterator DoStmt::child_end() { return &SubExprs[0]+END_EXPR; } + +// ForStmt +Stmt::child_iterator ForStmt::child_begin() { return &SubExprs[0]; } +Stmt::child_iterator ForStmt::child_end() { return &SubExprs[0]+END_EXPR; } + +// ObjCForCollectionStmt +Stmt::child_iterator ObjCForCollectionStmt::child_begin() { + return &SubExprs[0]; +} +Stmt::child_iterator ObjCForCollectionStmt::child_end() { + return &SubExprs[0]+END_EXPR; +} + +// GotoStmt +Stmt::child_iterator GotoStmt::child_begin() { return child_iterator(); } +Stmt::child_iterator GotoStmt::child_end() { return child_iterator(); } + +// IndirectGotoStmt +Stmt::child_iterator IndirectGotoStmt::child_begin() { + return reinterpret_cast<Stmt**>(&Target); +} + +Stmt::child_iterator IndirectGotoStmt::child_end() { return ++child_begin(); } + +// ContinueStmt +Stmt::child_iterator ContinueStmt::child_begin() { return child_iterator(); } +Stmt::child_iterator ContinueStmt::child_end() { return child_iterator(); } + +// BreakStmt +Stmt::child_iterator BreakStmt::child_begin() { return child_iterator(); } +Stmt::child_iterator BreakStmt::child_end() { return child_iterator(); } + +// ReturnStmt +Stmt::child_iterator ReturnStmt::child_begin() { + if (RetExpr) return reinterpret_cast<Stmt**>(&RetExpr); + else return child_iterator(); +} + +Stmt::child_iterator ReturnStmt::child_end() { + if (RetExpr) return reinterpret_cast<Stmt**>(&RetExpr)+1; + else return child_iterator(); +} + +// AsmStmt +Stmt::child_iterator AsmStmt::child_begin() { return child_iterator(); } +Stmt::child_iterator AsmStmt::child_end() { return child_iterator(); } + +// ObjCAtCatchStmt +Stmt::child_iterator ObjCAtCatchStmt::child_begin() { return &SubExprs[0]; } +Stmt::child_iterator ObjCAtCatchStmt::child_end() { + return &SubExprs[0]+END_EXPR; +} + +// ObjCAtFinallyStmt +Stmt::child_iterator ObjCAtFinallyStmt::child_begin() { return &AtFinallyStmt; } +Stmt::child_iterator ObjCAtFinallyStmt::child_end() { return &AtFinallyStmt+1; } + +// ObjCAtTryStmt +Stmt::child_iterator ObjCAtTryStmt::child_begin() { return &SubStmts[0]; } +Stmt::child_iterator ObjCAtTryStmt::child_end() { + return &SubStmts[0]+END_EXPR; +} + +// ObjCAtThrowStmt +Stmt::child_iterator ObjCAtThrowStmt::child_begin() { + return &Throw; +} + +Stmt::child_iterator ObjCAtThrowStmt::child_end() { + return &Throw+1; +} + +// ObjCAtSynchronizedStmt +Stmt::child_iterator ObjCAtSynchronizedStmt::child_begin() { + return &SubStmts[0]; +} + +Stmt::child_iterator ObjCAtSynchronizedStmt::child_end() { + return &SubStmts[0]+END_EXPR; +} + diff --git a/clang/lib/AST/StmtDumper.cpp b/clang/lib/AST/StmtDumper.cpp new file mode 100644 index 00000000000..d813899b530 --- /dev/null +++ b/clang/lib/AST/StmtDumper.cpp @@ -0,0 +1,486 @@ +//===--- StmtDumper.cpp - Dumping implementation for Stmt ASTs ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Stmt::dump/Stmt::print methods, which dump out the +// AST in a form that exposes type details and other fields. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/ExprCXX.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/Support/Compiler.h" +#include <cstdio> +using namespace clang; + +//===----------------------------------------------------------------------===// +// StmtDumper Visitor +//===----------------------------------------------------------------------===// + +namespace { + class VISIBILITY_HIDDEN StmtDumper : public StmtVisitor<StmtDumper> { + SourceManager *SM; + FILE *F; + unsigned IndentLevel; + + /// MaxDepth - When doing a normal dump (not dumpAll) we only want to dump + /// the first few levels of an AST. This keeps track of how many ast levels + /// are left. + unsigned MaxDepth; + + /// LastLocFilename/LastLocLine - Keep track of the last location we print + /// out so that we can print out deltas from then on out. + const char *LastLocFilename; + unsigned LastLocLine; + public: + StmtDumper(SourceManager *sm, FILE *f, unsigned maxDepth) + : SM(sm), F(f), IndentLevel(0-1), MaxDepth(maxDepth) { + LastLocFilename = ""; + LastLocLine = ~0U; + } + + void DumpSubTree(Stmt *S) { + // Prune the recursion if not using dump all. + if (MaxDepth == 0) return; + + ++IndentLevel; + if (S) { + if (DeclStmt* DS = dyn_cast<DeclStmt>(S)) + VisitDeclStmt(DS); + else { + Visit(S); + + // Print out children. + Stmt::child_iterator CI = S->child_begin(), CE = S->child_end(); + if (CI != CE) { + while (CI != CE) { + fprintf(F, "\n"); + DumpSubTree(*CI++); + } + } + fprintf(F, ")"); + } + } else { + Indent(); + fprintf(F, "<<<NULL>>>"); + } + --IndentLevel; + } + + void DumpDeclarator(Decl *D); + + void Indent() const { + for (int i = 0, e = IndentLevel; i < e; ++i) + fprintf(F, " "); + } + + void DumpType(QualType T) { + fprintf(F, "'%s'", T.getAsString().c_str()); + + // If the type is directly a typedef, strip off typedefness to give at + // least one level of concreteness. + if (TypedefType *TDT = dyn_cast<TypedefType>(T)) + fprintf(F, ":'%s'", TDT->LookThroughTypedefs().getAsString().c_str()); + } + void DumpStmt(const Stmt *Node) { + Indent(); + fprintf(F, "(%s %p", Node->getStmtClassName(), (void*)Node); + DumpSourceRange(Node); + } + void DumpExpr(const Expr *Node) { + DumpStmt(Node); + fprintf(F, " "); + DumpType(Node->getType()); + } + void DumpSourceRange(const Stmt *Node); + void DumpLocation(SourceLocation Loc); + + // Stmts. + void VisitStmt(Stmt *Node); + void VisitDeclStmt(DeclStmt *Node); + void VisitLabelStmt(LabelStmt *Node); + void VisitGotoStmt(GotoStmt *Node); + + // Exprs + void VisitExpr(Expr *Node); + void VisitDeclRefExpr(DeclRefExpr *Node); + void VisitPreDefinedExpr(PreDefinedExpr *Node); + void VisitCharacterLiteral(CharacterLiteral *Node); + void VisitIntegerLiteral(IntegerLiteral *Node); + void VisitFloatingLiteral(FloatingLiteral *Node); + void VisitStringLiteral(StringLiteral *Str); + void VisitUnaryOperator(UnaryOperator *Node); + void VisitSizeOfAlignOfTypeExpr(SizeOfAlignOfTypeExpr *Node); + void VisitMemberExpr(MemberExpr *Node); + void VisitOCUVectorElementExpr(OCUVectorElementExpr *Node); + void VisitBinaryOperator(BinaryOperator *Node); + void VisitCompoundAssignOperator(CompoundAssignOperator *Node); + void VisitAddrLabelExpr(AddrLabelExpr *Node); + void VisitTypesCompatibleExpr(TypesCompatibleExpr *Node); + + // C++ + void VisitCXXCastExpr(CXXCastExpr *Node); + void VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *Node); + + // ObjC + void VisitObjCEncodeExpr(ObjCEncodeExpr *Node); + void VisitObjCMessageExpr(ObjCMessageExpr* Node); + void VisitObjCSelectorExpr(ObjCSelectorExpr *Node); + void VisitObjCProtocolExpr(ObjCProtocolExpr *Node); + void VisitObjCIvarRefExpr(ObjCIvarRefExpr *Node); + }; +} + +//===----------------------------------------------------------------------===// +// Utilities +//===----------------------------------------------------------------------===// + +void StmtDumper::DumpLocation(SourceLocation Loc) { + SourceLocation PhysLoc = SM->getPhysicalLoc(Loc); + + // The general format we print out is filename:line:col, but we drop pieces + // that haven't changed since the last loc printed. + const char *Filename = SM->getSourceName(PhysLoc); + unsigned LineNo = SM->getLineNumber(PhysLoc); + if (strcmp(Filename, LastLocFilename) != 0) { + fprintf(stderr, "%s:%u:%u", Filename, LineNo, SM->getColumnNumber(PhysLoc)); + LastLocFilename = Filename; + LastLocLine = LineNo; + } else if (LineNo != LastLocLine) { + fprintf(stderr, "line:%u:%u", LineNo, SM->getColumnNumber(PhysLoc)); + LastLocLine = LineNo; + } else { + fprintf(stderr, "col:%u", SM->getColumnNumber(PhysLoc)); + } +} + +void StmtDumper::DumpSourceRange(const Stmt *Node) { + // Can't translate locations if a SourceManager isn't available. + if (SM == 0) return; + + // TODO: If the parent expression is available, we can print a delta vs its + // location. + SourceRange R = Node->getSourceRange(); + + fprintf(stderr, " <"); + DumpLocation(R.getBegin()); + if (R.getBegin() != R.getEnd()) { + fprintf(stderr, ", "); + DumpLocation(R.getEnd()); + } + fprintf(stderr, ">"); + + // <t2.c:123:421[blah], t2.c:412:321> + +} + + +//===----------------------------------------------------------------------===// +// Stmt printing methods. +//===----------------------------------------------------------------------===// + +void StmtDumper::VisitStmt(Stmt *Node) { + DumpStmt(Node); +} + +void StmtDumper::DumpDeclarator(Decl *D) { + // FIXME: Need to complete/beautify this... this code simply shows the + // nodes are where they need to be. + if (TypedefDecl *localType = dyn_cast<TypedefDecl>(D)) { + fprintf(F, "\"typedef %s %s\"", + localType->getUnderlyingType().getAsString().c_str(), + localType->getName()); + } else if (ValueDecl *VD = dyn_cast<ValueDecl>(D)) { + fprintf(F, "\""); + // Emit storage class for vardecls. + if (VarDecl *V = dyn_cast<VarDecl>(VD)) { + switch (V->getStorageClass()) { + default: assert(0 && "Unknown storage class!"); + case VarDecl::None: break; + case VarDecl::Extern: fprintf(F, "extern "); break; + case VarDecl::Static: fprintf(F, "static "); break; + case VarDecl::Auto: fprintf(F, "auto "); break; + case VarDecl::Register: fprintf(F, "register "); break; + } + } + + std::string Name = VD->getName(); + VD->getType().getAsStringInternal(Name); + fprintf(F, "%s", Name.c_str()); + + // If this is a vardecl with an initializer, emit it. + if (VarDecl *V = dyn_cast<VarDecl>(VD)) { + if (V->getInit()) { + fprintf(F, " =\n"); + DumpSubTree(V->getInit()); + } + } + fprintf(F, "\""); + } else if (TagDecl *TD = dyn_cast<TagDecl>(D)) { + // print a free standing tag decl (e.g. "struct x;"). + const char *tagname; + if (const IdentifierInfo *II = TD->getIdentifier()) + tagname = II->getName(); + else + tagname = "<anonymous>"; + fprintf(F, "\"%s %s;\"", TD->getKindName(), tagname); + // FIXME: print tag bodies. + } else { + assert(0 && "Unexpected decl"); + } +} + +void StmtDumper::VisitDeclStmt(DeclStmt *Node) { + DumpStmt(Node); + fprintf(F,"\n"); + for (ScopedDecl *D = Node->getDecl(); D; D = D->getNextDeclarator()) { + ++IndentLevel; + Indent(); + fprintf(F, "%p ", (void*) D); + DumpDeclarator(D); + if (D->getNextDeclarator()) + fprintf(F,"\n"); + --IndentLevel; + } +} + +void StmtDumper::VisitLabelStmt(LabelStmt *Node) { + DumpStmt(Node); + fprintf(F, " '%s'\n", Node->getName()); +} + +void StmtDumper::VisitGotoStmt(GotoStmt *Node) { + DumpStmt(Node); + fprintf(F, " '%s':%p", Node->getLabel()->getName(), (void*)Node->getLabel()); +} + +//===----------------------------------------------------------------------===// +// Expr printing methods. +//===----------------------------------------------------------------------===// + +void StmtDumper::VisitExpr(Expr *Node) { + DumpExpr(Node); +} + +void StmtDumper::VisitDeclRefExpr(DeclRefExpr *Node) { + DumpExpr(Node); + + fprintf(F, " "); + switch (Node->getDecl()->getKind()) { + case Decl::Function: fprintf(F,"FunctionDecl"); break; + case Decl::BlockVar: fprintf(F,"BlockVar"); break; + case Decl::FileVar: fprintf(F,"FileVar"); break; + case Decl::ParmVar: fprintf(F,"ParmVar"); break; + case Decl::EnumConstant: fprintf(F,"EnumConstant"); break; + case Decl::Typedef: fprintf(F,"Typedef"); break; + case Decl::Struct: fprintf(F,"Struct"); break; + case Decl::Union: fprintf(F,"Union"); break; + case Decl::Class: fprintf(F,"Class"); break; + case Decl::Enum: fprintf(F,"Enum"); break; + case Decl::ObjCInterface: fprintf(F,"ObjCInterface"); break; + case Decl::ObjCClass: fprintf(F,"ObjCClass"); break; + default: fprintf(F,"Decl"); break; + } + + fprintf(F, "='%s' %p", Node->getDecl()->getName(), (void*)Node->getDecl()); +} + +void StmtDumper::VisitObjCIvarRefExpr(ObjCIvarRefExpr *Node) { + DumpExpr(Node->getBase()); + + fprintf(F, " ObjCIvarRefExpr"); + fprintf(F, "='%s' %p", Node->getDecl()->getName(), (void*)Node->getDecl()); +} + +void StmtDumper::VisitPreDefinedExpr(PreDefinedExpr *Node) { + DumpExpr(Node); + switch (Node->getIdentType()) { + default: + assert(0 && "unknown case"); + case PreDefinedExpr::Func: + fprintf(F, " __func__"); + break; + case PreDefinedExpr::Function: + fprintf(F, " __FUNCTION__"); + break; + case PreDefinedExpr::PrettyFunction: + fprintf(F, " __PRETTY_FUNCTION__"); + break; + } +} + +void StmtDumper::VisitCharacterLiteral(CharacterLiteral *Node) { + DumpExpr(Node); + fprintf(F, " %d", Node->getValue()); +} + +void StmtDumper::VisitIntegerLiteral(IntegerLiteral *Node) { + DumpExpr(Node); + + bool isSigned = Node->getType()->isSignedIntegerType(); + fprintf(F, " %s", Node->getValue().toString(10, isSigned).c_str()); +} +void StmtDumper::VisitFloatingLiteral(FloatingLiteral *Node) { + DumpExpr(Node); + fprintf(F, " %f", Node->getValueAsDouble()); +} + +void StmtDumper::VisitStringLiteral(StringLiteral *Str) { + DumpExpr(Str); + // FIXME: this doesn't print wstrings right. + fprintf(F, " %s\"", Str->isWide() ? "L" : ""); + + for (unsigned i = 0, e = Str->getByteLength(); i != e; ++i) { + switch (char C = Str->getStrData()[i]) { + default: + if (isprint(C)) + fputc(C, F); + else + fprintf(F, "\\%03o", C); + break; + // Handle some common ones to make dumps prettier. + case '\\': fprintf(F, "\\\\"); break; + case '"': fprintf(F, "\\\""); break; + case '\n': fprintf(F, "\\n"); break; + case '\t': fprintf(F, "\\t"); break; + case '\a': fprintf(F, "\\a"); break; + case '\b': fprintf(F, "\\b"); break; + } + } + fprintf(F, "\""); +} + +void StmtDumper::VisitUnaryOperator(UnaryOperator *Node) { + DumpExpr(Node); + fprintf(F, " %s '%s'", Node->isPostfix() ? "postfix" : "prefix", + UnaryOperator::getOpcodeStr(Node->getOpcode())); +} +void StmtDumper::VisitSizeOfAlignOfTypeExpr(SizeOfAlignOfTypeExpr *Node) { + DumpExpr(Node); + fprintf(F, " %s ", Node->isSizeOf() ? "sizeof" : "alignof"); + DumpType(Node->getArgumentType()); +} + +void StmtDumper::VisitMemberExpr(MemberExpr *Node) { + DumpExpr(Node); + fprintf(F, " %s%s %p", Node->isArrow() ? "->" : ".", + Node->getMemberDecl()->getName(), (void*)Node->getMemberDecl()); +} +void StmtDumper::VisitOCUVectorElementExpr(OCUVectorElementExpr *Node) { + DumpExpr(Node); + fprintf(F, " %s", Node->getAccessor().getName()); +} +void StmtDumper::VisitBinaryOperator(BinaryOperator *Node) { + DumpExpr(Node); + fprintf(F, " '%s'", BinaryOperator::getOpcodeStr(Node->getOpcode())); +} +void StmtDumper::VisitCompoundAssignOperator(CompoundAssignOperator *Node) { + DumpExpr(Node); + fprintf(F, " '%s' ComputeTy=", + BinaryOperator::getOpcodeStr(Node->getOpcode())); + DumpType(Node->getComputationType()); +} + +// GNU extensions. + +void StmtDumper::VisitAddrLabelExpr(AddrLabelExpr *Node) { + DumpExpr(Node); + fprintf(F, " %s %p", Node->getLabel()->getName(), (void*)Node->getLabel()); +} + +void StmtDumper::VisitTypesCompatibleExpr(TypesCompatibleExpr *Node) { + DumpExpr(Node); + fprintf(F, " "); + DumpType(Node->getArgType1()); + fprintf(F, " "); + DumpType(Node->getArgType2()); +} + +//===----------------------------------------------------------------------===// +// C++ Expressions +//===----------------------------------------------------------------------===// + +void StmtDumper::VisitCXXCastExpr(CXXCastExpr *Node) { + DumpExpr(Node); + fprintf(F, " %s", CXXCastExpr::getOpcodeStr(Node->getOpcode())); +} + +void StmtDumper::VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *Node) { + DumpExpr(Node); + fprintf(F, " %s", Node->getValue() ? "true" : "false"); +} + +//===----------------------------------------------------------------------===// +// Obj-C Expressions +//===----------------------------------------------------------------------===// + +void StmtDumper::VisitObjCMessageExpr(ObjCMessageExpr* Node) { + DumpExpr(Node); + fprintf(F, " selector=%s", Node->getSelector().getName().c_str()); +} + +void StmtDumper::VisitObjCEncodeExpr(ObjCEncodeExpr *Node) { + DumpExpr(Node); + + fprintf(F, " "); + DumpType(Node->getEncodedType()); +} + +void StmtDumper::VisitObjCSelectorExpr(ObjCSelectorExpr *Node) { + DumpExpr(Node); + + fprintf(F, " "); + Selector &selector = Node->getSelector(); + fprintf(F, "%s", selector.getName().c_str()); +} + +void StmtDumper::VisitObjCProtocolExpr(ObjCProtocolExpr *Node) { + DumpExpr(Node); + + fprintf(F, " "); + fprintf(F, "%s", Node->getProtocol()->getName()); +} +//===----------------------------------------------------------------------===// +// Stmt method implementations +//===----------------------------------------------------------------------===// + +/// dump - This does a local dump of the specified AST fragment. It dumps the +/// specified node and a few nodes underneath it, but not the whole subtree. +/// This is useful in a debugger. +void Stmt::dump(SourceManager &SM) const { + StmtDumper P(&SM, stderr, 4); + P.DumpSubTree(const_cast<Stmt*>(this)); + fprintf(stderr, "\n"); +} + +/// dump - This does a local dump of the specified AST fragment. It dumps the +/// specified node and a few nodes underneath it, but not the whole subtree. +/// This is useful in a debugger. +void Stmt::dump() const { + StmtDumper P(0, stderr, 4); + P.DumpSubTree(const_cast<Stmt*>(this)); + fprintf(stderr, "\n"); +} + +/// dumpAll - This does a dump of the specified AST fragment and all subtrees. +void Stmt::dumpAll(SourceManager &SM) const { + StmtDumper P(&SM, stderr, ~0U); + P.DumpSubTree(const_cast<Stmt*>(this)); + fprintf(stderr, "\n"); +} + +/// dumpAll - This does a dump of the specified AST fragment and all subtrees. +void Stmt::dumpAll() const { + StmtDumper P(0, stderr, ~0U); + P.DumpSubTree(const_cast<Stmt*>(this)); + fprintf(stderr, "\n"); +} diff --git a/clang/lib/AST/StmtIterator.cpp b/clang/lib/AST/StmtIterator.cpp new file mode 100644 index 00000000000..14083e30a99 --- /dev/null +++ b/clang/lib/AST/StmtIterator.cpp @@ -0,0 +1,118 @@ +//===--- StmtIterator.cpp - Iterators for Statements ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines internal methods for StmtIterator. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/StmtIterator.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Decl.h" + +using namespace clang; + +static inline VariableArrayType* FindVA(Type* t) { + while (ArrayType* vt = dyn_cast<ArrayType>(t)) { + if (VariableArrayType* vat = dyn_cast<VariableArrayType>(vt)) + if (vat->getSizeExpr()) + return vat; + + t = vt->getElementType().getTypePtr(); + } + + return NULL; +} + +void StmtIteratorBase::NextVA() { + assert (getVAPtr()); + + VariableArrayType* p = getVAPtr(); + p = FindVA(p->getElementType().getTypePtr()); + setVAPtr(p); + + if (!p && decl) { + if (VarDecl* VD = dyn_cast<VarDecl>(decl)) + if (VD->Init) + return; + + NextDecl(); + } + else { + RawVAPtr = 0; + } +} + +void StmtIteratorBase::NextDecl(bool ImmediateAdvance) { + assert (inDecl()); + assert (getVAPtr() == NULL); + assert (decl); + + if (ImmediateAdvance) { + decl = decl->getNextDeclarator(); + + if (!decl) { + RawVAPtr = 0; + return; + } + } + + for ( ; decl ; decl = decl->getNextDeclarator()) { + if (VarDecl* VD = dyn_cast<VarDecl>(decl)) { + if (VariableArrayType* VAPtr = FindVA(VD->getType().getTypePtr())) { + setVAPtr(VAPtr); + return; + } + + if (VD->getInit()) + return; + } + else if (TypedefDecl* TD = dyn_cast<TypedefDecl>(decl)) { + if (VariableArrayType* VAPtr = + FindVA(TD->getUnderlyingType().getTypePtr())) { + setVAPtr(VAPtr); + return; + } + } + else if (EnumConstantDecl* ECD = dyn_cast<EnumConstantDecl>(decl)) + if (ECD->getInitExpr()) + return; + } + + if (!decl) { + RawVAPtr = 0; + return; + } +} + +StmtIteratorBase::StmtIteratorBase(ScopedDecl* d) + : decl(d), RawVAPtr(DeclMode) { + assert (decl); + NextDecl(false); +} + +StmtIteratorBase::StmtIteratorBase(VariableArrayType* t) +: decl(NULL), RawVAPtr(SizeOfTypeVAMode) { + RawVAPtr |= reinterpret_cast<uintptr_t>(t); +} + + +Stmt*& StmtIteratorBase::GetDeclExpr() const { + if (VariableArrayType* VAPtr = getVAPtr()) { + assert (VAPtr->SizeExpr); + return reinterpret_cast<Stmt*&>(VAPtr->SizeExpr); + } + + if (VarDecl* VD = dyn_cast<VarDecl>(decl)) { + assert (VD->Init); + return reinterpret_cast<Stmt*&>(VD->Init); + } + + EnumConstantDecl* ECD = cast<EnumConstantDecl>(decl); + return reinterpret_cast<Stmt*&>(ECD->Init); +} diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp new file mode 100644 index 00000000000..ba82b7fcff7 --- /dev/null +++ b/clang/lib/AST/StmtPrinter.cpp @@ -0,0 +1,854 @@ +//===--- StmtPrinter.cpp - Printing implementation for Stmt ASTs ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Stmt::dumpPretty/Stmt::printPretty methods, which +// pretty print the AST back out to C code. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/PrettyPrinter.h" +#include "clang/Basic/IdentifierTable.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Streams.h" +#include <iomanip> +using namespace clang; + +//===----------------------------------------------------------------------===// +// StmtPrinter Visitor +//===----------------------------------------------------------------------===// + +namespace { + class VISIBILITY_HIDDEN StmtPrinter : public StmtVisitor<StmtPrinter> { + std::ostream &OS; + unsigned IndentLevel; + clang::PrinterHelper* Helper; + public: + StmtPrinter(std::ostream &os, PrinterHelper* helper) : + OS(os), IndentLevel(0), Helper(helper) {} + + void PrintStmt(Stmt *S, int SubIndent = 1) { + IndentLevel += SubIndent; + if (S && isa<Expr>(S)) { + // If this is an expr used in a stmt context, indent and newline it. + Indent(); + Visit(S); + OS << ";\n"; + } else if (S) { + Visit(S); + } else { + Indent() << "<<<NULL STATEMENT>>>\n"; + } + IndentLevel -= SubIndent; + } + + void PrintRawCompoundStmt(CompoundStmt *S); + void PrintRawDecl(Decl *D); + void PrintRawIfStmt(IfStmt *If); + + void PrintExpr(Expr *E) { + if (E) + Visit(E); + else + OS << "<null expr>"; + } + + std::ostream &Indent(int Delta = 0) const { + for (int i = 0, e = IndentLevel+Delta; i < e; ++i) + OS << " "; + return OS; + } + + bool PrintOffsetOfDesignator(Expr *E); + void VisitUnaryOffsetOf(UnaryOperator *Node); + + void Visit(Stmt* S) { + if (Helper && Helper->handledStmt(S,OS)) + return; + else StmtVisitor<StmtPrinter>::Visit(S); + } + + void VisitStmt(Stmt *Node); +#define STMT(N, CLASS, PARENT) \ + void Visit##CLASS(CLASS *Node); +#include "clang/AST/StmtNodes.def" + }; +} + +//===----------------------------------------------------------------------===// +// Stmt printing methods. +//===----------------------------------------------------------------------===// + +void StmtPrinter::VisitStmt(Stmt *Node) { + Indent() << "<<unknown stmt type>>\n"; +} + +/// PrintRawCompoundStmt - Print a compound stmt without indenting the {, and +/// with no newline after the }. +void StmtPrinter::PrintRawCompoundStmt(CompoundStmt *Node) { + OS << "{\n"; + for (CompoundStmt::body_iterator I = Node->body_begin(), E = Node->body_end(); + I != E; ++I) + PrintStmt(*I); + + Indent() << "}"; +} + +void StmtPrinter::PrintRawDecl(Decl *D) { + // FIXME: Need to complete/beautify this... this code simply shows the + // nodes are where they need to be. + if (TypedefDecl *localType = dyn_cast<TypedefDecl>(D)) { + OS << "typedef " << localType->getUnderlyingType().getAsString(); + OS << " " << localType->getName(); + } else if (ValueDecl *VD = dyn_cast<ValueDecl>(D)) { + // Emit storage class for vardecls. + if (VarDecl *V = dyn_cast<VarDecl>(VD)) { + switch (V->getStorageClass()) { + default: assert(0 && "Unknown storage class!"); + case VarDecl::None: break; + case VarDecl::Extern: OS << "extern "; break; + case VarDecl::Static: OS << "static "; break; + case VarDecl::Auto: OS << "auto "; break; + case VarDecl::Register: OS << "register "; break; + } + } + + std::string Name = VD->getName(); + VD->getType().getAsStringInternal(Name); + OS << Name; + + // If this is a vardecl with an initializer, emit it. + if (VarDecl *V = dyn_cast<VarDecl>(VD)) { + if (V->getInit()) { + OS << " = "; + PrintExpr(V->getInit()); + } + } + } else if (TagDecl *TD = dyn_cast<TagDecl>(D)) { + // print a free standing tag decl (e.g. "struct x;"). + OS << TD->getKindName(); + OS << " "; + if (const IdentifierInfo *II = TD->getIdentifier()) + OS << II->getName(); + else + OS << "<anonymous>"; + // FIXME: print tag bodies. + } else { + assert(0 && "Unexpected decl"); + } +} + + +void StmtPrinter::VisitNullStmt(NullStmt *Node) { + Indent() << ";\n"; +} + +void StmtPrinter::VisitDeclStmt(DeclStmt *Node) { + for (ScopedDecl *D = Node->getDecl(); D; D = D->getNextDeclarator()) { + Indent(); + PrintRawDecl(D); + OS << ";\n"; + } +} + +void StmtPrinter::VisitCompoundStmt(CompoundStmt *Node) { + Indent(); + PrintRawCompoundStmt(Node); + OS << "\n"; +} + +void StmtPrinter::VisitCaseStmt(CaseStmt *Node) { + Indent(-1) << "case "; + PrintExpr(Node->getLHS()); + if (Node->getRHS()) { + OS << " ... "; + PrintExpr(Node->getRHS()); + } + OS << ":\n"; + + PrintStmt(Node->getSubStmt(), 0); +} + +void StmtPrinter::VisitDefaultStmt(DefaultStmt *Node) { + Indent(-1) << "default:\n"; + PrintStmt(Node->getSubStmt(), 0); +} + +void StmtPrinter::VisitLabelStmt(LabelStmt *Node) { + Indent(-1) << Node->getName() << ":\n"; + PrintStmt(Node->getSubStmt(), 0); +} + +void StmtPrinter::PrintRawIfStmt(IfStmt *If) { + OS << "if "; + PrintExpr(If->getCond()); + + if (CompoundStmt *CS = dyn_cast<CompoundStmt>(If->getThen())) { + OS << ' '; + PrintRawCompoundStmt(CS); + OS << (If->getElse() ? ' ' : '\n'); + } else { + OS << '\n'; + PrintStmt(If->getThen()); + if (If->getElse()) Indent(); + } + + if (Stmt *Else = If->getElse()) { + OS << "else"; + + if (CompoundStmt *CS = dyn_cast<CompoundStmt>(Else)) { + OS << ' '; + PrintRawCompoundStmt(CS); + OS << '\n'; + } else if (IfStmt *ElseIf = dyn_cast<IfStmt>(Else)) { + OS << ' '; + PrintRawIfStmt(ElseIf); + } else { + OS << '\n'; + PrintStmt(If->getElse()); + } + } +} + +void StmtPrinter::VisitIfStmt(IfStmt *If) { + Indent(); + PrintRawIfStmt(If); +} + +void StmtPrinter::VisitSwitchStmt(SwitchStmt *Node) { + Indent() << "switch ("; + PrintExpr(Node->getCond()); + OS << ")"; + + // Pretty print compoundstmt bodies (very common). + if (CompoundStmt *CS = dyn_cast<CompoundStmt>(Node->getBody())) { + OS << " "; + PrintRawCompoundStmt(CS); + OS << "\n"; + } else { + OS << "\n"; + PrintStmt(Node->getBody()); + } +} + +void StmtPrinter::VisitSwitchCase(SwitchCase*) { + assert(0 && "SwitchCase is an abstract class"); +} + +void StmtPrinter::VisitWhileStmt(WhileStmt *Node) { + Indent() << "while ("; + PrintExpr(Node->getCond()); + OS << ")\n"; + PrintStmt(Node->getBody()); +} + +void StmtPrinter::VisitDoStmt(DoStmt *Node) { + Indent() << "do "; + if (CompoundStmt *CS = dyn_cast<CompoundStmt>(Node->getBody())) { + PrintRawCompoundStmt(CS); + OS << " "; + } else { + OS << "\n"; + PrintStmt(Node->getBody()); + Indent(); + } + + OS << "while "; + PrintExpr(Node->getCond()); + OS << ";\n"; +} + +void StmtPrinter::VisitForStmt(ForStmt *Node) { + Indent() << "for ("; + if (Node->getInit()) { + if (DeclStmt *DS = dyn_cast<DeclStmt>(Node->getInit())) + PrintRawDecl(DS->getDecl()); + else + PrintExpr(cast<Expr>(Node->getInit())); + } + OS << ";"; + if (Node->getCond()) { + OS << " "; + PrintExpr(Node->getCond()); + } + OS << ";"; + if (Node->getInc()) { + OS << " "; + PrintExpr(Node->getInc()); + } + OS << ") "; + + if (CompoundStmt *CS = dyn_cast<CompoundStmt>(Node->getBody())) { + PrintRawCompoundStmt(CS); + OS << "\n"; + } else { + OS << "\n"; + PrintStmt(Node->getBody()); + } +} + +void StmtPrinter::VisitObjCForCollectionStmt(ObjCForCollectionStmt *Node) { + Indent() << "for ("; + if (DeclStmt *DS = dyn_cast<DeclStmt>(Node->getElement())) + PrintRawDecl(DS->getDecl()); + else + PrintExpr(cast<Expr>(Node->getElement())); + OS << " in "; + PrintExpr(Node->getCollection()); + OS << ") "; + + if (CompoundStmt *CS = dyn_cast<CompoundStmt>(Node->getBody())) { + PrintRawCompoundStmt(CS); + OS << "\n"; + } else { + OS << "\n"; + PrintStmt(Node->getBody()); + } +} + +void StmtPrinter::VisitGotoStmt(GotoStmt *Node) { + Indent() << "goto " << Node->getLabel()->getName() << ";\n"; +} + +void StmtPrinter::VisitIndirectGotoStmt(IndirectGotoStmt *Node) { + Indent() << "goto *"; + PrintExpr(Node->getTarget()); + OS << ";\n"; +} + +void StmtPrinter::VisitContinueStmt(ContinueStmt *Node) { + Indent() << "continue;\n"; +} + +void StmtPrinter::VisitBreakStmt(BreakStmt *Node) { + Indent() << "break;\n"; +} + + +void StmtPrinter::VisitReturnStmt(ReturnStmt *Node) { + Indent() << "return"; + if (Node->getRetValue()) { + OS << " "; + PrintExpr(Node->getRetValue()); + } + OS << ";\n"; +} + + +void StmtPrinter::VisitAsmStmt(AsmStmt *Node) { + Indent() << "asm "; + + if (Node->isVolatile()) + OS << "volatile "; + + OS << "("; + VisitStringLiteral(Node->getAsmString()); + + // Outputs + if (Node->getNumOutputs() != 0 || Node->getNumInputs() != 0 || + Node->getNumClobbers() != 0) + OS << " : "; + + for (unsigned i = 0, e = Node->getNumOutputs(); i != e; ++i) { + if (i != 0) + OS << ", "; + + if (!Node->getOutputName(i).empty()) { + OS << '['; + OS << Node->getOutputName(i); + OS << "] "; + } + + VisitStringLiteral(Node->getOutputConstraint(i)); + OS << " "; + Visit(Node->getOutputExpr(i)); + } + + // Inputs + if (Node->getNumInputs() != 0 || Node->getNumClobbers() != 0) + OS << " : "; + + for (unsigned i = 0, e = Node->getNumInputs(); i != e; ++i) { + if (i != 0) + OS << ", "; + + if (!Node->getInputName(i).empty()) { + OS << '['; + OS << Node->getInputName(i); + OS << "] "; + } + + VisitStringLiteral(Node->getInputConstraint(i)); + OS << " "; + Visit(Node->getInputExpr(i)); + } + + // Clobbers + if (Node->getNumClobbers() != 0) + OS << " : "; + + for (unsigned i = 0, e = Node->getNumClobbers(); i != e; ++i) { + if (i != 0) + OS << ", "; + + VisitStringLiteral(Node->getClobber(i)); + } + + OS << ");\n"; +} + +void StmtPrinter::VisitObjCAtTryStmt(ObjCAtTryStmt *Node) { + Indent() << "@try"; + if (CompoundStmt *TS = dyn_cast<CompoundStmt>(Node->getTryBody())) { + PrintRawCompoundStmt(TS); + OS << "\n"; + } + + for (ObjCAtCatchStmt *catchStmt = + static_cast<ObjCAtCatchStmt *>(Node->getCatchStmts()); + catchStmt; + catchStmt = + static_cast<ObjCAtCatchStmt *>(catchStmt->getNextCatchStmt())) { + Indent() << "@catch("; + if (catchStmt->getCatchParamStmt()) { + if (DeclStmt *DS = dyn_cast<DeclStmt>(catchStmt->getCatchParamStmt())) + PrintRawDecl(DS->getDecl()); + } + OS << ")"; + if (CompoundStmt *CS = dyn_cast<CompoundStmt>(catchStmt->getCatchBody())) + { + PrintRawCompoundStmt(CS); + OS << "\n"; + } + } + + if (ObjCAtFinallyStmt *FS =static_cast<ObjCAtFinallyStmt *>( + Node->getFinallyStmt())) { + Indent() << "@finally"; + PrintRawCompoundStmt(dyn_cast<CompoundStmt>(FS->getFinallyBody())); + OS << "\n"; + } +} + +void StmtPrinter::VisitObjCAtFinallyStmt(ObjCAtFinallyStmt *Node) { +} + +void StmtPrinter::VisitObjCAtCatchStmt (ObjCAtCatchStmt *Node) { + Indent() << "@catch (...) { /* todo */ } \n"; +} + +void StmtPrinter::VisitObjCAtThrowStmt(ObjCAtThrowStmt *Node) { + Indent() << "@throw"; + if (Node->getThrowExpr()) { + OS << " "; + PrintExpr(Node->getThrowExpr()); + } + OS << ";\n"; +} + +void StmtPrinter::VisitObjCAtSynchronizedStmt(ObjCAtSynchronizedStmt *Node) { + Indent() << "@synchronized ("; + PrintExpr(Node->getSynchExpr()); + OS << ")"; + PrintRawCompoundStmt(Node->getSynchBody()); + OS << "\n"; +} + +//===----------------------------------------------------------------------===// +// Expr printing methods. +//===----------------------------------------------------------------------===// + +void StmtPrinter::VisitExpr(Expr *Node) { + OS << "<<unknown expr type>>"; +} + +void StmtPrinter::VisitDeclRefExpr(DeclRefExpr *Node) { + OS << Node->getDecl()->getName(); +} + +void StmtPrinter::VisitObjCIvarRefExpr(ObjCIvarRefExpr *Node) { + if (Node->getBase()) { + PrintExpr(Node->getBase()); + OS << (Node->isArrow() ? "->" : "."); + } + OS << Node->getDecl()->getName(); +} + +void StmtPrinter::VisitPreDefinedExpr(PreDefinedExpr *Node) { + switch (Node->getIdentType()) { + default: + assert(0 && "unknown case"); + case PreDefinedExpr::Func: + OS << "__func__"; + break; + case PreDefinedExpr::Function: + OS << "__FUNCTION__"; + break; + case PreDefinedExpr::PrettyFunction: + OS << "__PRETTY_FUNCTION__"; + break; + } +} + +void StmtPrinter::VisitCharacterLiteral(CharacterLiteral *Node) { + // FIXME should print an L for wchar_t constants + unsigned value = Node->getValue(); + switch (value) { + case '\\': + OS << "'\\\\'"; + break; + case '\'': + OS << "'\\''"; + break; + case '\a': + // TODO: K&R: the meaning of '\\a' is different in traditional C + OS << "'\\a'"; + break; + case '\b': + OS << "'\\b'"; + break; + // Nonstandard escape sequence. + /*case '\e': + OS << "'\\e'"; + break;*/ + case '\f': + OS << "'\\f'"; + break; + case '\n': + OS << "'\\n'"; + break; + case '\r': + OS << "'\\r'"; + break; + case '\t': + OS << "'\\t'"; + break; + case '\v': + OS << "'\\v'"; + break; + default: + if (value < 256 && isprint(value)) { + OS << "'" << (char)value << "'"; + } else if (value < 256) { + OS << "'\\x" << std::hex << value << std::dec << "'"; + } else { + // FIXME what to really do here? + OS << value; + } + } +} + +void StmtPrinter::VisitIntegerLiteral(IntegerLiteral *Node) { + bool isSigned = Node->getType()->isSignedIntegerType(); + OS << Node->getValue().toString(10, isSigned); + + // Emit suffixes. Integer literals are always a builtin integer type. + switch (cast<BuiltinType>(Node->getType().getCanonicalType())->getKind()) { + default: assert(0 && "Unexpected type for integer literal!"); + case BuiltinType::Int: break; // no suffix. + case BuiltinType::UInt: OS << 'U'; break; + case BuiltinType::Long: OS << 'L'; break; + case BuiltinType::ULong: OS << "UL"; break; + case BuiltinType::LongLong: OS << "LL"; break; + case BuiltinType::ULongLong: OS << "ULL"; break; + } +} +void StmtPrinter::VisitFloatingLiteral(FloatingLiteral *Node) { + // FIXME: print value more precisely. + OS << Node->getValueAsDouble(); +} + +void StmtPrinter::VisitImaginaryLiteral(ImaginaryLiteral *Node) { + PrintExpr(Node->getSubExpr()); + OS << "i"; +} + +void StmtPrinter::VisitStringLiteral(StringLiteral *Str) { + if (Str->isWide()) OS << 'L'; + OS << '"'; + + // FIXME: this doesn't print wstrings right. + for (unsigned i = 0, e = Str->getByteLength(); i != e; ++i) { + switch (Str->getStrData()[i]) { + default: OS << Str->getStrData()[i]; break; + // Handle some common ones to make dumps prettier. + case '\\': OS << "\\\\"; break; + case '"': OS << "\\\""; break; + case '\n': OS << "\\n"; break; + case '\t': OS << "\\t"; break; + case '\a': OS << "\\a"; break; + case '\b': OS << "\\b"; break; + } + } + OS << '"'; +} +void StmtPrinter::VisitParenExpr(ParenExpr *Node) { + OS << "("; + PrintExpr(Node->getSubExpr()); + OS << ")"; +} +void StmtPrinter::VisitUnaryOperator(UnaryOperator *Node) { + if (!Node->isPostfix()) { + OS << UnaryOperator::getOpcodeStr(Node->getOpcode()); + + // Print a space if this is an "identifier operator" like sizeof or __real. + switch (Node->getOpcode()) { + default: break; + case UnaryOperator::SizeOf: + case UnaryOperator::AlignOf: + case UnaryOperator::Real: + case UnaryOperator::Imag: + case UnaryOperator::Extension: + OS << ' '; + break; + } + } + PrintExpr(Node->getSubExpr()); + + if (Node->isPostfix()) + OS << UnaryOperator::getOpcodeStr(Node->getOpcode()); +} + +bool StmtPrinter::PrintOffsetOfDesignator(Expr *E) { + if (isa<CompoundLiteralExpr>(E)) { + // Base case, print the type and comma. + OS << E->getType().getAsString() << ", "; + return true; + } else if (ArraySubscriptExpr *ASE = dyn_cast<ArraySubscriptExpr>(E)) { + PrintOffsetOfDesignator(ASE->getLHS()); + OS << "["; + PrintExpr(ASE->getRHS()); + OS << "]"; + return false; + } else { + MemberExpr *ME = cast<MemberExpr>(E); + bool IsFirst = PrintOffsetOfDesignator(ME->getBase()); + OS << (IsFirst ? "" : ".") << ME->getMemberDecl()->getName(); + return false; + } +} + +void StmtPrinter::VisitUnaryOffsetOf(UnaryOperator *Node) { + OS << "__builtin_offsetof("; + PrintOffsetOfDesignator(Node->getSubExpr()); + OS << ")"; +} + +void StmtPrinter::VisitSizeOfAlignOfTypeExpr(SizeOfAlignOfTypeExpr *Node) { + OS << (Node->isSizeOf() ? "sizeof(" : "__alignof("); + OS << Node->getArgumentType().getAsString() << ")"; +} +void StmtPrinter::VisitArraySubscriptExpr(ArraySubscriptExpr *Node) { + PrintExpr(Node->getLHS()); + OS << "["; + PrintExpr(Node->getRHS()); + OS << "]"; +} + +void StmtPrinter::VisitCallExpr(CallExpr *Call) { + PrintExpr(Call->getCallee()); + OS << "("; + for (unsigned i = 0, e = Call->getNumArgs(); i != e; ++i) { + if (i) OS << ", "; + PrintExpr(Call->getArg(i)); + } + OS << ")"; +} +void StmtPrinter::VisitMemberExpr(MemberExpr *Node) { + PrintExpr(Node->getBase()); + OS << (Node->isArrow() ? "->" : "."); + + FieldDecl *Field = Node->getMemberDecl(); + assert(Field && "MemberExpr should alway reference a field!"); + OS << Field->getName(); +} +void StmtPrinter::VisitOCUVectorElementExpr(OCUVectorElementExpr *Node) { + PrintExpr(Node->getBase()); + OS << "."; + OS << Node->getAccessor().getName(); +} +void StmtPrinter::VisitCastExpr(CastExpr *Node) { + OS << "(" << Node->getType().getAsString() << ")"; + PrintExpr(Node->getSubExpr()); +} +void StmtPrinter::VisitCompoundLiteralExpr(CompoundLiteralExpr *Node) { + OS << "(" << Node->getType().getAsString() << ")"; + PrintExpr(Node->getInitializer()); +} +void StmtPrinter::VisitImplicitCastExpr(ImplicitCastExpr *Node) { + // No need to print anything, simply forward to the sub expression. + PrintExpr(Node->getSubExpr()); +} +void StmtPrinter::VisitBinaryOperator(BinaryOperator *Node) { + PrintExpr(Node->getLHS()); + OS << " " << BinaryOperator::getOpcodeStr(Node->getOpcode()) << " "; + PrintExpr(Node->getRHS()); +} +void StmtPrinter::VisitCompoundAssignOperator(CompoundAssignOperator *Node) { + PrintExpr(Node->getLHS()); + OS << " " << BinaryOperator::getOpcodeStr(Node->getOpcode()) << " "; + PrintExpr(Node->getRHS()); +} +void StmtPrinter::VisitConditionalOperator(ConditionalOperator *Node) { + PrintExpr(Node->getCond()); + + if (Node->getLHS()) { + OS << " ? "; + PrintExpr(Node->getLHS()); + OS << " : "; + } + else { // Handle GCC extention where LHS can be NULL. + OS << " ?: "; + } + + PrintExpr(Node->getRHS()); +} + +// GNU extensions. + +void StmtPrinter::VisitAddrLabelExpr(AddrLabelExpr *Node) { + OS << "&&" << Node->getLabel()->getName(); +} + +void StmtPrinter::VisitStmtExpr(StmtExpr *E) { + OS << "("; + PrintRawCompoundStmt(E->getSubStmt()); + OS << ")"; +} + +void StmtPrinter::VisitTypesCompatibleExpr(TypesCompatibleExpr *Node) { + OS << "__builtin_types_compatible_p("; + OS << Node->getArgType1().getAsString() << ","; + OS << Node->getArgType2().getAsString() << ")"; +} + +void StmtPrinter::VisitChooseExpr(ChooseExpr *Node) { + OS << "__builtin_choose_expr("; + PrintExpr(Node->getCond()); + OS << ", "; + PrintExpr(Node->getLHS()); + OS << ", "; + PrintExpr(Node->getRHS()); + OS << ")"; +} + +void StmtPrinter::VisitOverloadExpr(OverloadExpr *Node) { + OS << "__builtin_overload("; + for (unsigned i = 0, e = Node->getNumSubExprs(); i != e; ++i) { + if (i) OS << ", "; + PrintExpr(Node->getExpr(i)); + } + OS << ")"; +} + +void StmtPrinter::VisitInitListExpr(InitListExpr* Node) { + OS << "{ "; + for (unsigned i = 0, e = Node->getNumInits(); i != e; ++i) { + if (i) OS << ", "; + PrintExpr(Node->getInit(i)); + } + OS << " }"; +} + +void StmtPrinter::VisitVAArgExpr(VAArgExpr *Node) { + OS << "va_arg("; + PrintExpr(Node->getSubExpr()); + OS << ", "; + OS << Node->getType().getAsString(); + OS << ")"; +} + +// C++ + +void StmtPrinter::VisitCXXCastExpr(CXXCastExpr *Node) { + OS << CXXCastExpr::getOpcodeStr(Node->getOpcode()) << '<'; + OS << Node->getDestType().getAsString() << ">("; + PrintExpr(Node->getSubExpr()); + OS << ")"; +} + +void StmtPrinter::VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *Node) { + OS << (Node->getValue() ? "true" : "false"); +} + +void StmtPrinter::VisitCXXThrowExpr(CXXThrowExpr *Node) { + if (Node->getSubExpr() == 0) + OS << "throw"; + else { + OS << "throw "; + PrintExpr(Node->getSubExpr()); + } +} + +// Obj-C + +void StmtPrinter::VisitObjCStringLiteral(ObjCStringLiteral *Node) { + OS << "@"; + VisitStringLiteral(Node->getString()); +} + +void StmtPrinter::VisitObjCEncodeExpr(ObjCEncodeExpr *Node) { + OS << "@encode(" << Node->getEncodedType().getAsString() << ")"; +} + +void StmtPrinter::VisitObjCSelectorExpr(ObjCSelectorExpr *Node) { + OS << "@selector(" << Node->getSelector().getName() << ")"; +} + +void StmtPrinter::VisitObjCProtocolExpr(ObjCProtocolExpr *Node) { + OS << "@protocol(" << Node->getProtocol()->getName() << ")"; +} + +void StmtPrinter::VisitObjCMessageExpr(ObjCMessageExpr *Mess) { + OS << "["; + Expr *receiver = Mess->getReceiver(); + if (receiver) PrintExpr(receiver); + else OS << Mess->getClassName()->getName(); + Selector &selector = Mess->getSelector(); + if (selector.isUnarySelector()) { + OS << " " << selector.getIdentifierInfoForSlot(0)->getName(); + } else { + for (unsigned i = 0, e = Mess->getNumArgs(); i != e; ++i) { + if (selector.getIdentifierInfoForSlot(i)) + OS << selector.getIdentifierInfoForSlot(i)->getName() << ":"; + else + OS << ":"; + PrintExpr(Mess->getArg(i)); + } + } + OS << "]"; +} + +//===----------------------------------------------------------------------===// +// Stmt method implementations +//===----------------------------------------------------------------------===// + +void Stmt::dumpPretty() const { + printPretty(*llvm::cerr.stream()); +} + +void Stmt::printPretty(std::ostream &OS, PrinterHelper* Helper) const { + if (this == 0) { + OS << "<NULL>"; + return; + } + + StmtPrinter P(OS, Helper); + P.Visit(const_cast<Stmt*>(this)); +} + +//===----------------------------------------------------------------------===// +// PrinterHelper +//===----------------------------------------------------------------------===// + +// Implement virtual destructor. +PrinterHelper::~PrinterHelper() {} diff --git a/clang/lib/AST/StmtSerialization.cpp b/clang/lib/AST/StmtSerialization.cpp new file mode 100644 index 00000000000..433e8e27026 --- /dev/null +++ b/clang/lib/AST/StmtSerialization.cpp @@ -0,0 +1,1001 @@ +//===--- StmtSerialization.cpp - Serialization of Statements --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the type-specific methods for serializing statements +// and expressions. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Expr.h" +#include "llvm/Bitcode/Serialize.h" +#include "llvm/Bitcode/Deserialize.h" + +using namespace clang; +using llvm::Serializer; +using llvm::Deserializer; + +void Stmt::Emit(Serializer& S) const { + S.FlushRecord(); + S.EmitInt(getStmtClass()); + EmitImpl(S); + S.FlushRecord(); +} + +Stmt* Stmt::Create(Deserializer& D) { + StmtClass SC = static_cast<StmtClass>(D.ReadInt()); + + switch (SC) { + default: + assert (false && "Not implemented."); + return NULL; + + case AddrLabelExprClass: + return AddrLabelExpr::CreateImpl(D); + + case ArraySubscriptExprClass: + return ArraySubscriptExpr::CreateImpl(D); + + case AsmStmtClass: + return AsmStmt::CreateImpl(D); + + case BinaryOperatorClass: + return BinaryOperator::CreateImpl(D); + + case BreakStmtClass: + return BreakStmt::CreateImpl(D); + + case CallExprClass: + return CallExpr::CreateImpl(D); + + case CaseStmtClass: + return CaseStmt::CreateImpl(D); + + case CastExprClass: + return CastExpr::CreateImpl(D); + + case CharacterLiteralClass: + return CharacterLiteral::CreateImpl(D); + + case CompoundAssignOperatorClass: + return CompoundAssignOperator::CreateImpl(D); + + case CompoundLiteralExprClass: + return CompoundLiteralExpr::CreateImpl(D); + + case CompoundStmtClass: + return CompoundStmt::CreateImpl(D); + + case ConditionalOperatorClass: + return ConditionalOperator::CreateImpl(D); + + case ContinueStmtClass: + return ContinueStmt::CreateImpl(D); + + case DeclRefExprClass: + return DeclRefExpr::CreateImpl(D); + + case DeclStmtClass: + return DeclStmt::CreateImpl(D); + + case DefaultStmtClass: + return DefaultStmt::CreateImpl(D); + + case DoStmtClass: + return DoStmt::CreateImpl(D); + + case FloatingLiteralClass: + return FloatingLiteral::CreateImpl(D); + + case ForStmtClass: + return ForStmt::CreateImpl(D); + + case GotoStmtClass: + return GotoStmt::CreateImpl(D); + + case IfStmtClass: + return IfStmt::CreateImpl(D); + + case ImaginaryLiteralClass: + return ImaginaryLiteral::CreateImpl(D); + + case ImplicitCastExprClass: + return ImplicitCastExpr::CreateImpl(D); + + case IndirectGotoStmtClass: + return IndirectGotoStmt::CreateImpl(D); + + case InitListExprClass: + return InitListExpr::CreateImpl(D); + + case IntegerLiteralClass: + return IntegerLiteral::CreateImpl(D); + + case LabelStmtClass: + return LabelStmt::CreateImpl(D); + + case MemberExprClass: + return MemberExpr::CreateImpl(D); + + case NullStmtClass: + return NullStmt::CreateImpl(D); + + case ParenExprClass: + return ParenExpr::CreateImpl(D); + + case PreDefinedExprClass: + return PreDefinedExpr::CreateImpl(D); + + case ReturnStmtClass: + return ReturnStmt::CreateImpl(D); + + case SizeOfAlignOfTypeExprClass: + return SizeOfAlignOfTypeExpr::CreateImpl(D); + + case StmtExprClass: + return StmtExpr::CreateImpl(D); + + case StringLiteralClass: + return StringLiteral::CreateImpl(D); + + case SwitchStmtClass: + return SwitchStmt::CreateImpl(D); + + case UnaryOperatorClass: + return UnaryOperator::CreateImpl(D); + + case WhileStmtClass: + return WhileStmt::CreateImpl(D); + + //==--------------------------------------==// + // Objective C + //==--------------------------------------==// + + case ObjCAtCatchStmtClass: + return ObjCAtCatchStmt::CreateImpl(D); + + case ObjCAtFinallyStmtClass: + return ObjCAtFinallyStmt::CreateImpl(D); + + case ObjCAtSynchronizedStmtClass: + return ObjCAtSynchronizedStmt::CreateImpl(D); + + case ObjCAtThrowStmtClass: + return ObjCAtThrowStmt::CreateImpl(D); + + case ObjCAtTryStmtClass: + return ObjCAtTryStmt::CreateImpl(D); + + case ObjCEncodeExprClass: + return ObjCEncodeExpr::CreateImpl(D); + + case ObjCForCollectionStmtClass: + return ObjCForCollectionStmt::CreateImpl(D); + + case ObjCIvarRefExprClass: + return ObjCIvarRefExpr::CreateImpl(D); + + case ObjCSelectorExprClass: + return ObjCSelectorExpr::CreateImpl(D); + + case ObjCStringLiteralClass: + return ObjCStringLiteral::CreateImpl(D); + } +} + +//===----------------------------------------------------------------------===// +// C Serialization +//===----------------------------------------------------------------------===// + +void AddrLabelExpr::EmitImpl(Serializer& S) const { + S.Emit(getType()); + S.Emit(AmpAmpLoc); + S.Emit(LabelLoc); + S.EmitPtr(Label); +} + +AddrLabelExpr* AddrLabelExpr::CreateImpl(Deserializer& D) { + QualType t = QualType::ReadVal(D); + SourceLocation AALoc = SourceLocation::ReadVal(D); + SourceLocation LLoc = SourceLocation::ReadVal(D); + AddrLabelExpr* expr = new AddrLabelExpr(AALoc,LLoc,NULL,t); + D.ReadPtr(expr->Label); // Pointer may be backpatched. + return expr; +} + +void ArraySubscriptExpr::EmitImpl(Serializer& S) const { + S.Emit(getType()); + S.Emit(RBracketLoc); + S.BatchEmitOwnedPtrs(getLHS(),getRHS()); +} + +ArraySubscriptExpr* ArraySubscriptExpr::CreateImpl(Deserializer& D) { + QualType t = QualType::ReadVal(D); + SourceLocation L = SourceLocation::ReadVal(D); + Expr *LHS, *RHS; + D.BatchReadOwnedPtrs(LHS,RHS); + return new ArraySubscriptExpr(LHS,RHS,t,L); +} + +void AsmStmt::EmitImpl(Serializer& S) const { + S.Emit(AsmLoc); + + getAsmString()->EmitImpl(S); + S.Emit(RParenLoc); + + S.EmitBool(IsVolatile); + S.EmitBool(IsSimple); + S.EmitInt(NumOutputs); + S.EmitInt(NumInputs); + + unsigned size = NumOutputs + NumInputs; + + for (unsigned i = 0; i < size; ++i) + S.EmitCStr(Names[i].c_str()); + + for (unsigned i = 0; i < size; ++i) + Constraints[i]->EmitImpl(S); + + for (unsigned i = 0; i < size; ++i) + S.EmitOwnedPtr(Exprs[i]); + + S.EmitInt(Clobbers.size()); + for (unsigned i = 0, e = Clobbers.size(); i != e; ++i) + Clobbers[i]->EmitImpl(S); +} + +AsmStmt* AsmStmt::CreateImpl(Deserializer& D) { + SourceLocation ALoc = SourceLocation::ReadVal(D); + StringLiteral *AsmStr = StringLiteral::CreateImpl(D); + SourceLocation PLoc = SourceLocation::ReadVal(D); + + bool IsVolatile = D.ReadBool(); + bool IsSimple = D.ReadBool(); + AsmStmt *Stmt = new AsmStmt(ALoc, IsSimple, IsVolatile, 0, 0, 0, 0, 0, + AsmStr, + 0, 0, PLoc); + + Stmt->NumOutputs = D.ReadInt(); + Stmt->NumInputs = D.ReadInt(); + + unsigned size = Stmt->NumOutputs + Stmt->NumInputs; + + Stmt->Names.reserve(size); + for (unsigned i = 0; i < size; ++i) { + std::vector<char> data; + D.ReadCStr(data, false); + + Stmt->Names.push_back(std::string(data.begin(), data.end())); + } + + Stmt->Constraints.reserve(size); + for (unsigned i = 0; i < size; ++i) + Stmt->Constraints.push_back(StringLiteral::CreateImpl(D)); + + Stmt->Exprs.reserve(size); + for (unsigned i = 0; i < size; ++i) + Stmt->Exprs.push_back(D.ReadOwnedPtr<Expr>()); + + unsigned NumClobbers = D.ReadInt(); + Stmt->Clobbers.reserve(NumClobbers); + for (unsigned i = 0; i < NumClobbers; ++i) + Stmt->Clobbers.push_back(StringLiteral::CreateImpl(D)); + + return Stmt; +} + +void BinaryOperator::EmitImpl(Serializer& S) const { + S.EmitInt(Opc); + S.Emit(OpLoc);; + S.Emit(getType()); + S.BatchEmitOwnedPtrs(getLHS(),getRHS()); +} + +BinaryOperator* BinaryOperator::CreateImpl(Deserializer& D) { + Opcode Opc = static_cast<Opcode>(D.ReadInt()); + SourceLocation OpLoc = SourceLocation::ReadVal(D); + QualType Result = QualType::ReadVal(D); + Expr *LHS, *RHS; + D.BatchReadOwnedPtrs(LHS,RHS); + + return new BinaryOperator(LHS,RHS,Opc,Result,OpLoc); +} + +void BreakStmt::EmitImpl(Serializer& S) const { + S.Emit(BreakLoc); +} + +BreakStmt* BreakStmt::CreateImpl(Deserializer& D) { + SourceLocation Loc = SourceLocation::ReadVal(D); + return new BreakStmt(Loc); +} + +void CallExpr::EmitImpl(Serializer& S) const { + S.Emit(getType()); + S.Emit(RParenLoc); + S.EmitInt(NumArgs); + S.BatchEmitOwnedPtrs(NumArgs+1,SubExprs); +} + +CallExpr* CallExpr::CreateImpl(Deserializer& D) { + QualType t = QualType::ReadVal(D); + SourceLocation L = SourceLocation::ReadVal(D); + unsigned NumArgs = D.ReadInt(); + Expr** SubExprs = new Expr*[NumArgs+1]; + D.BatchReadOwnedPtrs(NumArgs+1,SubExprs); + + return new CallExpr(SubExprs,NumArgs,t,L); +} + +void CaseStmt::EmitImpl(Serializer& S) const { + S.Emit(CaseLoc); + S.EmitPtr(getNextSwitchCase()); + S.BatchEmitOwnedPtrs((unsigned) END_EXPR,&SubExprs[0]); +} + +CaseStmt* CaseStmt::CreateImpl(Deserializer& D) { + SourceLocation CaseLoc = SourceLocation::ReadVal(D); + CaseStmt* stmt = new CaseStmt(NULL,NULL,NULL,CaseLoc); + D.ReadPtr(stmt->NextSwitchCase); + D.BatchReadOwnedPtrs((unsigned) END_EXPR,&stmt->SubExprs[0]); + return stmt; +} + +void CastExpr::EmitImpl(Serializer& S) const { + S.Emit(getType()); + S.Emit(Loc); + S.EmitOwnedPtr(Op); +} + +CastExpr* CastExpr::CreateImpl(Deserializer& D) { + QualType t = QualType::ReadVal(D); + SourceLocation Loc = SourceLocation::ReadVal(D); + Expr* Op = D.ReadOwnedPtr<Expr>(); + return new CastExpr(t,Op,Loc); +} + + +void CharacterLiteral::EmitImpl(Serializer& S) const { + S.Emit(Value); + S.Emit(Loc); + S.Emit(getType()); +} + +CharacterLiteral* CharacterLiteral::CreateImpl(Deserializer& D) { + unsigned value = D.ReadInt(); + SourceLocation Loc = SourceLocation::ReadVal(D); + QualType T = QualType::ReadVal(D); + return new CharacterLiteral(value,T,Loc); +} + +void CompoundAssignOperator::EmitImpl(Serializer& S) const { + S.Emit(getType()); + S.Emit(ComputationType); + S.Emit(getOperatorLoc()); + S.EmitInt(getOpcode()); + S.BatchEmitOwnedPtrs(getLHS(),getRHS()); +} + +CompoundAssignOperator* +CompoundAssignOperator::CreateImpl(Deserializer& D) { + QualType t = QualType::ReadVal(D); + QualType c = QualType::ReadVal(D); + SourceLocation L = SourceLocation::ReadVal(D); + Opcode Opc = static_cast<Opcode>(D.ReadInt()); + Expr* LHS, *RHS; + D.BatchReadOwnedPtrs(LHS,RHS); + + return new CompoundAssignOperator(LHS,RHS,Opc,t,c,L); +} + +void CompoundLiteralExpr::EmitImpl(Serializer& S) const { + S.Emit(getType()); + S.Emit(getLParenLoc()); + S.EmitBool(isFileScope()); + S.EmitOwnedPtr(Init); +} + +CompoundLiteralExpr* CompoundLiteralExpr::CreateImpl(Deserializer& D) { + QualType Q = QualType::ReadVal(D); + SourceLocation L = SourceLocation::ReadVal(D); + bool fileScope = D.ReadBool(); + Expr* Init = D.ReadOwnedPtr<Expr>(); + return new CompoundLiteralExpr(L, Q, Init, fileScope); +} + +void CompoundStmt::EmitImpl(Serializer& S) const { + S.Emit(LBracLoc); + S.Emit(RBracLoc); + S.Emit(Body.size()); + + for (const_body_iterator I=body_begin(), E=body_end(); I!=E; ++I) + S.EmitOwnedPtr(*I); +} + +CompoundStmt* CompoundStmt::CreateImpl(Deserializer& D) { + SourceLocation LB = SourceLocation::ReadVal(D); + SourceLocation RB = SourceLocation::ReadVal(D); + unsigned size = D.ReadInt(); + + CompoundStmt* stmt = new CompoundStmt(NULL,0,LB,RB); + + stmt->Body.reserve(size); + + for (unsigned i = 0; i < size; ++i) + stmt->Body.push_back(D.ReadOwnedPtr<Stmt>()); + + return stmt; +} + +void ConditionalOperator::EmitImpl(Serializer& S) const { + S.Emit(getType()); + S.BatchEmitOwnedPtrs((unsigned) END_EXPR, SubExprs); +} + +ConditionalOperator* ConditionalOperator::CreateImpl(Deserializer& D) { + QualType t = QualType::ReadVal(D); + ConditionalOperator* c = new ConditionalOperator(NULL,NULL,NULL,t); + D.BatchReadOwnedPtrs((unsigned) END_EXPR, c->SubExprs); + return c; +} + +void ContinueStmt::EmitImpl(Serializer& S) const { + S.Emit(ContinueLoc); +} + +ContinueStmt* ContinueStmt::CreateImpl(Deserializer& D) { + SourceLocation Loc = SourceLocation::ReadVal(D); + return new ContinueStmt(Loc); +} + +void DeclStmt::EmitImpl(Serializer& S) const { + // FIXME: special handling for struct decls. + S.EmitOwnedPtr(getDecl()); + S.Emit(StartLoc); + S.Emit(EndLoc); +} + +void DeclRefExpr::EmitImpl(Serializer& S) const { + S.Emit(Loc); + S.Emit(getType()); + + // Some DeclRefExprs can actually hold the owning reference to a FunctionDecl. + // This occurs when an implicitly defined function is called, and + // the decl does not appear in the source file. We thus check if the + // decl pointer has been registered, and if not, emit an owned pointer. + + // FIXME: While this will work for serialization, it won't work for + // memory management. The only reason this works for serialization is + // because we are tracking all serialized pointers. Either DeclRefExpr + // needs an explicit bit indicating that it owns the the object, + // or we need a different ownership model. + + const Decl* d = getDecl(); + + if (!S.isRegistered(d)) { + assert (isa<FunctionDecl>(d) + && "DeclRefExpr can only own FunctionDecls for implicitly def. funcs."); + + S.EmitBool(true); + S.EmitOwnedPtr(d); + } + else { + S.EmitBool(false); + S.EmitPtr(d); + } +} + +DeclRefExpr* DeclRefExpr::CreateImpl(Deserializer& D) { + SourceLocation Loc = SourceLocation::ReadVal(D); + QualType T = QualType::ReadVal(D); + bool OwnsDecl = D.ReadBool(); + ValueDecl* decl; + + if (!OwnsDecl) + D.ReadPtr(decl,false); // No backpatching. + else + decl = cast<ValueDecl>(D.ReadOwnedPtr<Decl>()); + + return new DeclRefExpr(decl,T,Loc); +} + + +DeclStmt* DeclStmt::CreateImpl(Deserializer& D) { + ScopedDecl* decl = cast<ScopedDecl>(D.ReadOwnedPtr<Decl>()); + SourceLocation StartLoc = SourceLocation::ReadVal(D); + SourceLocation EndLoc = SourceLocation::ReadVal(D); + return new DeclStmt(decl, StartLoc, EndLoc); +} + +void DefaultStmt::EmitImpl(Serializer& S) const { + S.Emit(DefaultLoc); + S.EmitOwnedPtr(getSubStmt()); + S.EmitPtr(getNextSwitchCase()); +} + +DefaultStmt* DefaultStmt::CreateImpl(Deserializer& D) { + SourceLocation Loc = SourceLocation::ReadVal(D); + Stmt* SubStmt = D.ReadOwnedPtr<Stmt>(); + + DefaultStmt* stmt = new DefaultStmt(Loc,SubStmt); + stmt->setNextSwitchCase(D.ReadPtr<SwitchCase>()); + + return stmt; +} + +void DoStmt::EmitImpl(Serializer& S) const { + S.Emit(DoLoc); + S.EmitOwnedPtr(getCond()); + S.EmitOwnedPtr(getBody()); +} + +DoStmt* DoStmt::CreateImpl(Deserializer& D) { + SourceLocation DoLoc = SourceLocation::ReadVal(D); + Expr* Cond = D.ReadOwnedPtr<Expr>(); + Stmt* Body = D.ReadOwnedPtr<Stmt>(); + return new DoStmt(Body,Cond,DoLoc); +} + +void FloatingLiteral::EmitImpl(Serializer& S) const { + S.Emit(Loc); + S.Emit(getType()); + S.EmitBool(isExact()); + S.Emit(Value); +} + +FloatingLiteral* FloatingLiteral::CreateImpl(Deserializer& D) { + SourceLocation Loc = SourceLocation::ReadVal(D); + QualType t = QualType::ReadVal(D); + bool isExact = D.ReadBool(); + llvm::APFloat Val = llvm::APFloat::ReadVal(D); + FloatingLiteral* expr = new FloatingLiteral(Val,&isExact,t,Loc); + return expr; +} + +void ForStmt::EmitImpl(Serializer& S) const { + S.Emit(ForLoc); + S.EmitOwnedPtr(getInit()); + S.EmitOwnedPtr(getCond()); + S.EmitOwnedPtr(getInc()); + S.EmitOwnedPtr(getBody()); +} + +ForStmt* ForStmt::CreateImpl(Deserializer& D) { + SourceLocation ForLoc = SourceLocation::ReadVal(D); + Stmt* Init = D.ReadOwnedPtr<Stmt>(); + Expr* Cond = D.ReadOwnedPtr<Expr>(); + Expr* Inc = D.ReadOwnedPtr<Expr>(); + Stmt* Body = D.ReadOwnedPtr<Stmt>(); + return new ForStmt(Init,Cond,Inc,Body,ForLoc); +} + +void GotoStmt::EmitImpl(Serializer& S) const { + S.Emit(GotoLoc); + S.Emit(LabelLoc); + S.EmitPtr(Label); +} + +GotoStmt* GotoStmt::CreateImpl(Deserializer& D) { + SourceLocation GotoLoc = SourceLocation::ReadVal(D); + SourceLocation LabelLoc = SourceLocation::ReadVal(D); + GotoStmt* stmt = new GotoStmt(NULL,GotoLoc,LabelLoc); + D.ReadPtr(stmt->Label); // This pointer may be backpatched later. + return stmt; +} + +void IfStmt::EmitImpl(Serializer& S) const { + S.Emit(IfLoc); + S.EmitOwnedPtr(getCond()); + S.EmitOwnedPtr(getThen()); + S.EmitOwnedPtr(getElse()); +} + +IfStmt* IfStmt::CreateImpl(Deserializer& D) { + SourceLocation L = SourceLocation::ReadVal(D); + Expr* Cond = D.ReadOwnedPtr<Expr>(); + Stmt* Then = D.ReadOwnedPtr<Stmt>(); + Stmt* Else = D.ReadOwnedPtr<Stmt>(); + return new IfStmt(L,Cond,Then,Else); +} + +void ImaginaryLiteral::EmitImpl(Serializer& S) const { + S.Emit(getType()); + S.EmitOwnedPtr(Val); +} + +ImaginaryLiteral* ImaginaryLiteral::CreateImpl(Deserializer& D) { + QualType t = QualType::ReadVal(D); + Expr* expr = D.ReadOwnedPtr<Expr>(); + assert (isa<FloatingLiteral>(expr) || isa<IntegerLiteral>(expr)); + return new ImaginaryLiteral(expr,t); +} + +void ImplicitCastExpr::EmitImpl(Serializer& S) const { + S.Emit(getType()); + S.EmitOwnedPtr(Op); +} + +ImplicitCastExpr* ImplicitCastExpr::CreateImpl(Deserializer& D) { + QualType t = QualType::ReadVal(D); + Expr* Op = D.ReadOwnedPtr<Expr>(); + return new ImplicitCastExpr(t,Op); +} + +void IndirectGotoStmt::EmitImpl(Serializer& S) const { + S.EmitOwnedPtr(Target); +} + +IndirectGotoStmt* IndirectGotoStmt::CreateImpl(Deserializer& D) { + Expr* Target = D.ReadOwnedPtr<Expr>(); + return new IndirectGotoStmt(Target); +} + +void InitListExpr::EmitImpl(Serializer& S) const { + S.Emit(LBraceLoc); + S.Emit(RBraceLoc); + S.EmitInt(NumInits); + S.BatchEmitOwnedPtrs(NumInits,InitExprs); +} + +InitListExpr* InitListExpr::CreateImpl(Deserializer& D) { + InitListExpr* expr = new InitListExpr(); + expr->LBraceLoc = SourceLocation::ReadVal(D); + expr->RBraceLoc = SourceLocation::ReadVal(D); + expr->NumInits = D.ReadInt(); + assert(expr->NumInits); + expr->InitExprs = new Expr*[expr->NumInits]; + D.BatchReadOwnedPtrs(expr->NumInits,expr->InitExprs); + return expr; +} + +void IntegerLiteral::EmitImpl(Serializer& S) const { + S.Emit(Loc); + S.Emit(getType()); + S.Emit(getValue()); +} + +IntegerLiteral* IntegerLiteral::CreateImpl(Deserializer& D) { + SourceLocation Loc = SourceLocation::ReadVal(D); + QualType T = QualType::ReadVal(D); + + // Create a dummy APInt because it is more efficient to deserialize + // it in place with the deserialized IntegerLiteral. (fewer copies) + llvm::APInt temp; + IntegerLiteral* expr = new IntegerLiteral(temp,T,Loc); + D.Read(expr->Value); + + return expr; +} + +void LabelStmt::EmitImpl(Serializer& S) const { + S.EmitPtr(Label); + S.Emit(IdentLoc); + S.EmitOwnedPtr(SubStmt); +} + +LabelStmt* LabelStmt::CreateImpl(Deserializer& D) { + IdentifierInfo* Label = D.ReadPtr<IdentifierInfo>(); + SourceLocation IdentLoc = SourceLocation::ReadVal(D); + Stmt* SubStmt = D.ReadOwnedPtr<Stmt>(); + return new LabelStmt(IdentLoc,Label,SubStmt); +} + +void MemberExpr::EmitImpl(Serializer& S) const { + S.Emit(MemberLoc); + S.EmitPtr(MemberDecl); + S.EmitBool(IsArrow); + S.Emit(getType()); + S.EmitOwnedPtr(Base); +} + +MemberExpr* MemberExpr::CreateImpl(Deserializer& D) { + SourceLocation L = SourceLocation::ReadVal(D); + FieldDecl* MemberDecl = cast<FieldDecl>(D.ReadPtr<Decl>()); + bool IsArrow = D.ReadBool(); + QualType T = QualType::ReadVal(D); + Expr* base = D.ReadOwnedPtr<Expr>(); + + return new MemberExpr(base,IsArrow,MemberDecl,L,T); +} + +void NullStmt::EmitImpl(Serializer& S) const { + S.Emit(SemiLoc); +} + +NullStmt* NullStmt::CreateImpl(Deserializer& D) { + SourceLocation SemiLoc = SourceLocation::ReadVal(D); + return new NullStmt(SemiLoc); +} + +void ParenExpr::EmitImpl(Serializer& S) const { + S.Emit(L); + S.Emit(R); + S.EmitOwnedPtr(Val); +} + +ParenExpr* ParenExpr::CreateImpl(Deserializer& D) { + SourceLocation L = SourceLocation::ReadVal(D); + SourceLocation R = SourceLocation::ReadVal(D); + Expr* val = D.ReadOwnedPtr<Expr>(); + return new ParenExpr(L,R,val); +} + +void PreDefinedExpr::EmitImpl(Serializer& S) const { + S.Emit(Loc); + S.EmitInt(getIdentType()); + S.Emit(getType()); +} + +PreDefinedExpr* PreDefinedExpr::CreateImpl(Deserializer& D) { + SourceLocation Loc = SourceLocation::ReadVal(D); + IdentType it = static_cast<IdentType>(D.ReadInt()); + QualType Q = QualType::ReadVal(D); + return new PreDefinedExpr(Loc,Q,it); +} + +void ReturnStmt::EmitImpl(Serializer& S) const { + S.Emit(RetLoc); + S.EmitOwnedPtr(RetExpr); +} + +ReturnStmt* ReturnStmt::CreateImpl(Deserializer& D) { + SourceLocation RetLoc = SourceLocation::ReadVal(D); + Expr* RetExpr = D.ReadOwnedPtr<Expr>(); + return new ReturnStmt(RetLoc,RetExpr); +} + +void SizeOfAlignOfTypeExpr::EmitImpl(Serializer& S) const { + S.EmitBool(isSizeof); + S.Emit(Ty); + S.Emit(getType()); + S.Emit(OpLoc); + S.Emit(RParenLoc); +} + +SizeOfAlignOfTypeExpr* SizeOfAlignOfTypeExpr::CreateImpl(Deserializer& D) { + bool isSizeof = D.ReadBool(); + QualType Ty = QualType::ReadVal(D); + QualType Res = QualType::ReadVal(D); + SourceLocation OpLoc = SourceLocation::ReadVal(D); + SourceLocation RParenLoc = SourceLocation::ReadVal(D); + + return new SizeOfAlignOfTypeExpr(isSizeof,Ty,Res,OpLoc,RParenLoc); +} + +void StmtExpr::EmitImpl(Serializer& S) const { + S.Emit(getType()); + S.Emit(LParenLoc); + S.Emit(RParenLoc); + S.EmitOwnedPtr(SubStmt); +} + +StmtExpr* StmtExpr::CreateImpl(Deserializer& D) { + QualType t = QualType::ReadVal(D); + SourceLocation L = SourceLocation::ReadVal(D); + SourceLocation R = SourceLocation::ReadVal(D); + CompoundStmt* SubStmt = cast<CompoundStmt>(D.ReadOwnedPtr<Stmt>()); + return new StmtExpr(SubStmt,t,L,R); +} + +void StringLiteral::EmitImpl(Serializer& S) const { + S.Emit(getType()); + S.Emit(firstTokLoc); + S.Emit(lastTokLoc); + S.EmitBool(isWide()); + S.Emit(getByteLength()); + + for (unsigned i = 0 ; i < ByteLength; ++i) + S.EmitInt(StrData[i]); +} + +StringLiteral* StringLiteral::CreateImpl(Deserializer& D) { + QualType t = QualType::ReadVal(D); + SourceLocation firstTokLoc = SourceLocation::ReadVal(D); + SourceLocation lastTokLoc = SourceLocation::ReadVal(D); + bool isWide = D.ReadBool(); + unsigned ByteLength = D.ReadInt(); + + StringLiteral* sl = new StringLiteral(NULL,0,isWide,t,firstTokLoc,lastTokLoc); + + char* StrData = new char[ByteLength]; + for (unsigned i = 0; i < ByteLength; ++i) + StrData[i] = (char) D.ReadInt(); + + sl->ByteLength = ByteLength; + sl->StrData = StrData; + + return sl; +} + +void SwitchStmt::EmitImpl(Serializer& S) const { + S.Emit(SwitchLoc); + S.EmitOwnedPtr(getCond()); + S.EmitOwnedPtr(getBody()); + S.EmitPtr(FirstCase); +} + +SwitchStmt* SwitchStmt::CreateImpl(Deserializer& D) { + SourceLocation Loc = SourceLocation::ReadVal(D); + Stmt* Cond = D.ReadOwnedPtr<Stmt>(); + Stmt* Body = D.ReadOwnedPtr<Stmt>(); + SwitchCase* FirstCase = cast<SwitchCase>(D.ReadPtr<Stmt>()); + + SwitchStmt* stmt = new SwitchStmt(cast<Expr>(Cond)); + stmt->setBody(Body,Loc); + stmt->FirstCase = FirstCase; + + return stmt; +} + +void UnaryOperator::EmitImpl(Serializer& S) const { + S.Emit(getType()); + S.Emit(Loc); + S.EmitInt(Opc); + S.EmitOwnedPtr(Val); +} + +UnaryOperator* UnaryOperator::CreateImpl(Deserializer& D) { + QualType t = QualType::ReadVal(D); + SourceLocation L = SourceLocation::ReadVal(D); + Opcode Opc = static_cast<Opcode>(D.ReadInt()); + Expr* Val = D.ReadOwnedPtr<Expr>(); + return new UnaryOperator(Val,Opc,t,L); +} + +void WhileStmt::EmitImpl(Serializer& S) const { + S.Emit(WhileLoc); + S.EmitOwnedPtr(getCond()); + S.EmitOwnedPtr(getBody()); +} + +WhileStmt* WhileStmt::CreateImpl(Deserializer& D) { + SourceLocation WhileLoc = SourceLocation::ReadVal(D); + Expr* Cond = D.ReadOwnedPtr<Expr>(); + Stmt* Body = D.ReadOwnedPtr<Stmt>(); + return new WhileStmt(Cond,Body,WhileLoc); +} + +//===----------------------------------------------------------------------===// +// Objective C Serialization +//===----------------------------------------------------------------------===// + +void ObjCAtCatchStmt::EmitImpl(Serializer& S) const { + S.Emit(AtCatchLoc); + S.Emit(RParenLoc); + S.BatchEmitOwnedPtrs((unsigned) END_EXPR, &SubExprs[0]); +} + +ObjCAtCatchStmt* ObjCAtCatchStmt::CreateImpl(Deserializer& D) { + SourceLocation AtCatchLoc = SourceLocation::ReadVal(D); + SourceLocation RParenLoc = SourceLocation::ReadVal(D); + + ObjCAtCatchStmt* stmt = new ObjCAtCatchStmt(AtCatchLoc,RParenLoc); + D.BatchReadOwnedPtrs((unsigned) END_EXPR, &stmt->SubExprs[0]); + + return stmt; +} + +void ObjCAtFinallyStmt::EmitImpl(Serializer& S) const { + S.Emit(AtFinallyLoc); + S.EmitOwnedPtr(AtFinallyStmt); +} + +ObjCAtFinallyStmt* ObjCAtFinallyStmt::CreateImpl(Deserializer& D) { + SourceLocation Loc = SourceLocation::ReadVal(D); + Stmt* AtFinallyStmt = D.ReadOwnedPtr<Stmt>(); + return new ObjCAtFinallyStmt(Loc,AtFinallyStmt); +} + +void ObjCAtSynchronizedStmt::EmitImpl(Serializer& S) const { + S.Emit(AtSynchronizedLoc); + S.BatchEmitOwnedPtrs((unsigned) END_EXPR,&SubStmts[0]); + } + +ObjCAtSynchronizedStmt* ObjCAtSynchronizedStmt::CreateImpl(Deserializer& D) { + SourceLocation L = SourceLocation::ReadVal(D); + ObjCAtSynchronizedStmt* stmt = new ObjCAtSynchronizedStmt(L,0,0); + D.BatchReadOwnedPtrs((unsigned) END_EXPR, &stmt->SubStmts[0]); + return stmt; +} + +void ObjCAtThrowStmt::EmitImpl(Serializer& S) const { + S.Emit(AtThrowLoc); + S.EmitOwnedPtr(Throw); +} + +ObjCAtThrowStmt* ObjCAtThrowStmt::CreateImpl(Deserializer& D) { + SourceLocation L = SourceLocation::ReadVal(D); + Stmt* Throw = D.ReadOwnedPtr<Stmt>(); + return new ObjCAtThrowStmt(L,Throw); +} + +void ObjCAtTryStmt::EmitImpl(Serializer& S) const { + S.Emit(AtTryLoc); + S.BatchEmitOwnedPtrs((unsigned) END_EXPR, &SubStmts[0]); +} + +ObjCAtTryStmt* ObjCAtTryStmt::CreateImpl(Deserializer& D) { + SourceLocation L = SourceLocation::ReadVal(D); + ObjCAtTryStmt* stmt = new ObjCAtTryStmt(L,NULL,NULL,NULL); + D.BatchReadOwnedPtrs((unsigned) END_EXPR, &stmt->SubStmts[0]); + return stmt; +} + +void ObjCEncodeExpr::EmitImpl(Serializer& S) const { + S.Emit(AtLoc); + S.Emit(RParenLoc); + S.Emit(getType()); + S.Emit(EncType); +} + +ObjCEncodeExpr* ObjCEncodeExpr::CreateImpl(Deserializer& D) { + SourceLocation AtLoc = SourceLocation::ReadVal(D); + SourceLocation RParenLoc = SourceLocation::ReadVal(D); + QualType T = QualType::ReadVal(D); + QualType ET = QualType::ReadVal(D); + return new ObjCEncodeExpr(T,ET,AtLoc,RParenLoc); +} + +void ObjCForCollectionStmt::EmitImpl(Serializer& S) const { + S.Emit(ForLoc); + S.Emit(RParenLoc); + S.BatchEmitOwnedPtrs(getElement(),getCollection(),getBody()); +} + +ObjCForCollectionStmt* ObjCForCollectionStmt::CreateImpl(Deserializer& D) { + SourceLocation ForLoc = SourceLocation::ReadVal(D); + SourceLocation RParenLoc = SourceLocation::ReadVal(D); + Stmt* Element; + Expr* Collection; + Stmt* Body; + D.BatchReadOwnedPtrs(Element,Collection,Body); + return new ObjCForCollectionStmt(Element,Collection,Body,ForLoc, RParenLoc); +} + +void ObjCIvarRefExpr::EmitImpl(Serializer& S) const { + S.Emit(Loc); + S.Emit(getType()); + S.EmitPtr(getDecl()); +} + +ObjCIvarRefExpr* ObjCIvarRefExpr::CreateImpl(Deserializer& D) { + SourceLocation Loc = SourceLocation::ReadVal(D); + QualType T = QualType::ReadVal(D); + ObjCIvarRefExpr* dr = new ObjCIvarRefExpr(NULL,T,Loc); + D.ReadPtr(dr->D,false); + return dr; +} + +void ObjCSelectorExpr::EmitImpl(Serializer& S) const { + S.Emit(AtLoc); + S.Emit(RParenLoc); + S.Emit(getType()); + S.Emit(SelName); +} + +ObjCSelectorExpr* ObjCSelectorExpr::CreateImpl(Deserializer& D) { + SourceLocation AtLoc = SourceLocation::ReadVal(D); + SourceLocation RParenLoc = SourceLocation::ReadVal(D); + QualType T = QualType::ReadVal(D); + Selector SelName = Selector::ReadVal(D); + + return new ObjCSelectorExpr(T,SelName,AtLoc,RParenLoc); +} + +void ObjCStringLiteral::EmitImpl(Serializer& S) const { + S.Emit(AtLoc); + S.Emit(getType()); + S.EmitOwnedPtr(String); +} + +ObjCStringLiteral* ObjCStringLiteral::CreateImpl(Deserializer& D) { + SourceLocation L = SourceLocation::ReadVal(D); + QualType T = QualType::ReadVal(D); + StringLiteral* String = cast<StringLiteral>(D.ReadOwnedPtr<Stmt>()); + return new ObjCStringLiteral(String,T,L); +} diff --git a/clang/lib/AST/StmtViz.cpp b/clang/lib/AST/StmtViz.cpp new file mode 100644 index 00000000000..51d514b20aa --- /dev/null +++ b/clang/lib/AST/StmtViz.cpp @@ -0,0 +1,59 @@ +//===--- StmtViz.cpp - Graphviz visualization for Stmt ASTs -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements Stmt::viewAST, which generates a Graphviz DOT file +// that depicts the AST and then calls Graphviz/dot+gv on it. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/StmtGraphTraits.h" +#include "llvm/Support/GraphWriter.h" +#include <sstream> + +using namespace clang; + +void Stmt::viewAST() const { +#ifndef NDEBUG + llvm::ViewGraph(this,"AST"); +#else + llvm::cerr << "Stmt::viewAST is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif +} + +namespace llvm { +template<> +struct DOTGraphTraits<const Stmt*> : public DefaultDOTGraphTraits { + static std::string getNodeLabel(const Stmt* Node, const Stmt* Graph) { + +#ifndef NDEBUG + std::ostringstream Out; + + if (Node) + Out << Node->getStmtClassName(); + else + Out << "<NULL>"; + + std::string OutStr = Out.str(); + if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); + + // Process string output to make it nicer... + for (unsigned i = 0; i != OutStr.length(); ++i) + if (OutStr[i] == '\n') { // Left justify + OutStr[i] = '\\'; + OutStr.insert(OutStr.begin()+i+1, 'l'); + } + + return OutStr; +#else + return ""; +#endif + } +}; +} // end namespace llvm diff --git a/clang/lib/AST/TranslationUnit.cpp b/clang/lib/AST/TranslationUnit.cpp new file mode 100644 index 00000000000..b91448b2d3f --- /dev/null +++ b/clang/lib/AST/TranslationUnit.cpp @@ -0,0 +1,225 @@ +//===--- TranslationUnit.cpp - Abstraction for Translation Units ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// FIXME: This should eventually be moved out of the driver, or replaced +// with its eventual successor. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/TranslationUnit.h" + +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/SourceManager.h" +#include "clang/AST/AST.h" + +#include "llvm/Bitcode/Serialize.h" +#include "llvm/Bitcode/Deserialize.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/System/Path.h" +#include "llvm/ADT/OwningPtr.h" + +#include <stdio.h> + +using namespace clang; + +enum { BasicMetadataBlock = 1, + ASTContextBlock = 2, + DeclsBlock = 3 }; + + +bool clang::EmitASTBitcodeFile(const TranslationUnit& TU, + const llvm::sys::Path& Filename) { + + // Reserve 256K for bitstream buffer. + std::vector<unsigned char> Buffer; + Buffer.reserve(256*1024); + + // Create bitstream. + llvm::BitstreamWriter Stream(Buffer); + + // Emit the preamble. + Stream.Emit((unsigned)'B', 8); + Stream.Emit((unsigned)'C', 8); + Stream.Emit(0xC, 4); + Stream.Emit(0xF, 4); + Stream.Emit(0xE, 4); + Stream.Emit(0x0, 4); + + { + // Create serializer. Placing it in its own scope assures any necessary + // finalization of bits to the buffer in the serializer's dstor. + llvm::Serializer Sezr(Stream); + + // Emit the translation unit. + TU.Emit(Sezr); + } + + // Write the bits to disk. + if (FILE* fp = fopen(Filename.c_str(),"wb")) { + fwrite((char*)&Buffer.front(), sizeof(char), Buffer.size(), fp); + fclose(fp); + return true; + } + + return false; +} + +void TranslationUnit::Emit(llvm::Serializer& Sezr) const { + + // ===---------------------------------------------------===/ + // Serialize the top-level decls. + // ===---------------------------------------------------===/ + + Sezr.EnterBlock(DeclsBlock); + + // Only serialize the head of a decl chain. The ASTConsumer interfaces + // provides us with each top-level decl, including those nested in + // a decl chain, so we may be passed decls that are already serialized. + for (const_iterator I=begin(), E=end(); I!=E; ++I) + if (!Sezr.isRegistered(*I)) + Sezr.EmitOwnedPtr(*I); + + Sezr.ExitBlock(); + + // ===---------------------------------------------------===/ + // Serialize the "Translation Unit" metadata. + // ===---------------------------------------------------===/ + + // Emit ASTContext. + Sezr.EnterBlock(ASTContextBlock); + Sezr.EmitOwnedPtr(Context); + Sezr.ExitBlock(); + + Sezr.EnterBlock(BasicMetadataBlock); + + // Block for SourceManager, LangOptions, and Target. Allows easy skipping + // around to the block for the Selectors during deserialization. + Sezr.EnterBlock(); + + // Emit the SourceManager. + Sezr.Emit(Context->getSourceManager()); + + // Emit the LangOptions. + Sezr.Emit(LangOpts); + + // Emit the Target. + Sezr.EmitPtr(&Context->Target); + Sezr.EmitCStr(Context->Target.getTargetTriple()); + + Sezr.ExitBlock(); // exit "BasicMetadataBlock" + + // Emit the Selectors. + Sezr.Emit(Context->Selectors); + + // Emit the Identifier Table. + Sezr.Emit(Context->Idents); + + Sezr.ExitBlock(); // exit "ASTContextBlock" +} + +TranslationUnit* +clang::ReadASTBitcodeFile(const llvm::sys::Path& Filename, FileManager& FMgr) { + + // Create the memory buffer that contains the contents of the file. + llvm::OwningPtr<llvm::MemoryBuffer> + MBuffer(llvm::MemoryBuffer::getFile(Filename.c_str(), + strlen(Filename.c_str()))); + + if (!MBuffer) { + // FIXME: Provide diagnostic. + return NULL; + } + + // Check if the file is of the proper length. + if (MBuffer->getBufferSize() & 0x3) { + // FIXME: Provide diagnostic: "Length should be a multiple of 4 bytes." + return NULL; + } + + // Create the bitstream reader. + unsigned char *BufPtr = (unsigned char *) MBuffer->getBufferStart(); + llvm::BitstreamReader Stream(BufPtr,BufPtr+MBuffer->getBufferSize()); + + if (Stream.Read(8) != 'B' || + Stream.Read(8) != 'C' || + Stream.Read(4) != 0xC || + Stream.Read(4) != 0xF || + Stream.Read(4) != 0xE || + Stream.Read(4) != 0x0) { + // FIXME: Provide diagnostic. + return NULL; + } + + // Create the deserializer. + llvm::Deserializer Dezr(Stream); + + return TranslationUnit::Create(Dezr,FMgr); +} + +TranslationUnit* TranslationUnit::Create(llvm::Deserializer& Dezr, + FileManager& FMgr) { + + // Create the translation unit object. + TranslationUnit* TU = new TranslationUnit(); + + // ===---------------------------------------------------===/ + // Deserialize the "Translation Unit" metadata. + // ===---------------------------------------------------===/ + + // Skip to the BasicMetaDataBlock. First jump to ASTContextBlock + // (which will appear earlier) and record its location. + + bool FoundBlock = Dezr.SkipToBlock(ASTContextBlock); + assert (FoundBlock); + + llvm::Deserializer::Location ASTContextBlockLoc = + Dezr.getCurrentBlockLocation(); + + FoundBlock = Dezr.SkipToBlock(BasicMetadataBlock); + assert (FoundBlock); + + // Read the SourceManager. + SourceManager::CreateAndRegister(Dezr,FMgr); + + // Read the LangOptions. + TU->LangOpts.Read(Dezr); + + { // Read the TargetInfo. + llvm::SerializedPtrID PtrID = Dezr.ReadPtrID(); + char* triple = Dezr.ReadCStr(NULL,0,true); + Dezr.RegisterPtr(PtrID,TargetInfo::CreateTargetInfo(std::string(triple))); + delete [] triple; + } + + // For Selectors, we must read the identifier table first because the + // SelectorTable depends on the identifiers being already deserialized. + llvm::Deserializer::Location SelectorBlkLoc = Dezr.getCurrentBlockLocation(); + Dezr.SkipBlock(); + + // Read the identifier table. + IdentifierTable::CreateAndRegister(Dezr); + + // Now jump back and read the selectors. + Dezr.JumpTo(SelectorBlkLoc); + SelectorTable::CreateAndRegister(Dezr); + + // Now jump back to ASTContextBlock and read the ASTContext. + Dezr.JumpTo(ASTContextBlockLoc); + TU->Context = Dezr.ReadOwnedPtr<ASTContext>(); + + // "Rewind" the stream. Find the block with the serialized top-level decls. + Dezr.Rewind(); + FoundBlock = Dezr.SkipToBlock(DeclsBlock); + assert (FoundBlock); + llvm::Deserializer::Location DeclBlockLoc = Dezr.getCurrentBlockLocation(); + + while (!Dezr.FinishedBlock(DeclBlockLoc)) + TU->AddTopLevelDecl(Dezr.ReadOwnedPtr<Decl>()); + + return TU; +} + diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp new file mode 100644 index 00000000000..80b2d5cb93d --- /dev/null +++ b/clang/lib/AST/Type.cpp @@ -0,0 +1,978 @@ +//===--- Type.cpp - Type representation and manipulation ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements type-related functionality. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Type.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/Expr.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/Support/Streams.h" +#include "llvm/ADT/StringExtras.h" +#include <sstream> + +using namespace clang; + +Type::~Type() {} + +/// isVoidType - Helper method to determine if this is the 'void' type. +bool Type::isVoidType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() == BuiltinType::Void; + return false; +} + +bool Type::isObjectType() const { + if (isa<FunctionType>(CanonicalType)) + return false; + else if (CanonicalType->isIncompleteType()) + return false; + else + return true; +} + +bool Type::isDerivedType() const { + switch (CanonicalType->getTypeClass()) { + case Pointer: + case VariableArray: + case ConstantArray: + case IncompleteArray: + case FunctionProto: + case FunctionNoProto: + case Reference: + return true; + case Tagged: { + const TagType *TT = cast<TagType>(CanonicalType); + const Decl::Kind Kind = TT->getDecl()->getKind(); + return Kind == Decl::Struct || Kind == Decl::Union; + } + default: + return false; + } +} + +bool Type::isStructureType() const { + if (const RecordType *RT = dyn_cast<RecordType>(CanonicalType)) + if (RT->getDecl()->getKind() == Decl::Struct) + return true; + return false; +} +bool Type::isUnionType() const { + if (const RecordType *RT = dyn_cast<RecordType>(CanonicalType)) + if (RT->getDecl()->getKind() == Decl::Union) + return true; + return false; +} + +bool Type::isComplexType() const { + if (const ComplexType *CT = dyn_cast<ComplexType>(CanonicalType)) + return CT->getElementType()->isFloatingType(); + return false; +} + +bool Type::isComplexIntegerType() const { + // Check for GCC complex integer extension. + if (const ComplexType *CT = dyn_cast<ComplexType>(CanonicalType)) + return CT->getElementType()->isIntegerType(); + return false; +} + +const ComplexType *Type::getAsComplexIntegerType() const { + // Are we directly a complex type? + if (const ComplexType *CTy = dyn_cast<ComplexType>(this)) { + if (CTy->getElementType()->isIntegerType()) + return CTy; + } + // If the canonical form of this type isn't the right kind, reject it. + const ComplexType *CTy = dyn_cast<ComplexType>(CanonicalType); + if (!CTy || !CTy->getElementType()->isIntegerType()) + return 0; + + // If this is a typedef for a complex type, strip the typedef off without + // losing all typedef information. + return getDesugaredType()->getAsComplexIntegerType(); +} + +/// getDesugaredType - Return the specified type with any "sugar" removed from +/// type type. This takes off typedefs, typeof's etc. If the outer level of +/// the type is already concrete, it returns it unmodified. This is similar +/// to getting the canonical type, but it doesn't remove *all* typedefs. For +/// example, it return "T*" as "T*", (not as "int*"), because the pointer is +/// concrete. +const Type *Type::getDesugaredType() const { + if (const TypedefType *TDT = dyn_cast<TypedefType>(this)) + return TDT->LookThroughTypedefs().getTypePtr(); + if (const TypeOfExpr *TOE = dyn_cast<TypeOfExpr>(this)) + return TOE->getUnderlyingExpr()->getType().getTypePtr(); + if (const TypeOfType *TOT = dyn_cast<TypeOfType>(this)) + return TOT->getUnderlyingType().getTypePtr(); + return this; +} + + +const BuiltinType *Type::getAsBuiltinType() const { + // If this is directly a builtin type, return it. + if (const BuiltinType *BTy = dyn_cast<BuiltinType>(this)) + return BTy; + + // If the canonical form of this type isn't a builtin type, reject it. + if (!isa<BuiltinType>(CanonicalType)) { + // Look through type qualifiers + if (isa<BuiltinType>(CanonicalType.getUnqualifiedType())) + return CanonicalType.getUnqualifiedType()->getAsBuiltinType(); + return 0; + } + + // If this is a typedef for a builtin type, strip the typedef off without + // losing all typedef information. + return getDesugaredType()->getAsBuiltinType(); +} + +const FunctionType *Type::getAsFunctionType() const { + // If this is directly a function type, return it. + if (const FunctionType *FTy = dyn_cast<FunctionType>(this)) + return FTy; + + // If the canonical form of this type isn't the right kind, reject it. + if (!isa<FunctionType>(CanonicalType)) { + // Look through type qualifiers + if (isa<FunctionType>(CanonicalType.getUnqualifiedType())) + return CanonicalType.getUnqualifiedType()->getAsFunctionType(); + return 0; + } + + // If this is a typedef for a function type, strip the typedef off without + // losing all typedef information. + return getDesugaredType()->getAsFunctionType(); +} + +const PointerType *Type::getAsPointerType() const { + // If this is directly a pointer type, return it. + if (const PointerType *PTy = dyn_cast<PointerType>(this)) + return PTy; + + // If the canonical form of this type isn't the right kind, reject it. + if (!isa<PointerType>(CanonicalType)) { + // Look through type qualifiers + if (isa<PointerType>(CanonicalType.getUnqualifiedType())) + return CanonicalType.getUnqualifiedType()->getAsPointerType(); + return 0; + } + + // If this is a typedef for a pointer type, strip the typedef off without + // losing all typedef information. + return getDesugaredType()->getAsPointerType(); +} + +const ReferenceType *Type::getAsReferenceType() const { + // If this is directly a reference type, return it. + if (const ReferenceType *RTy = dyn_cast<ReferenceType>(this)) + return RTy; + + // If the canonical form of this type isn't the right kind, reject it. + if (!isa<ReferenceType>(CanonicalType)) { + // Look through type qualifiers + if (isa<ReferenceType>(CanonicalType.getUnqualifiedType())) + return CanonicalType.getUnqualifiedType()->getAsReferenceType(); + return 0; + } + + // If this is a typedef for a reference type, strip the typedef off without + // losing all typedef information. + return getDesugaredType()->getAsReferenceType(); +} + +const ArrayType *Type::getAsArrayType() const { + // If this is directly an array type, return it. + if (const ArrayType *ATy = dyn_cast<ArrayType>(this)) + return ATy; + + // If the canonical form of this type isn't the right kind, reject it. + if (!isa<ArrayType>(CanonicalType)) { + // Look through type qualifiers + if (isa<ArrayType>(CanonicalType.getUnqualifiedType())) + return CanonicalType.getUnqualifiedType()->getAsArrayType(); + return 0; + } + + // If this is a typedef for an array type, strip the typedef off without + // losing all typedef information. + return getDesugaredType()->getAsArrayType(); +} + +const ConstantArrayType *Type::getAsConstantArrayType() const { + // If this is directly a constant array type, return it. + if (const ConstantArrayType *ATy = dyn_cast<ConstantArrayType>(this)) + return ATy; + + // If the canonical form of this type isn't the right kind, reject it. + if (!isa<ConstantArrayType>(CanonicalType)) { + // Look through type qualifiers + if (isa<ConstantArrayType>(CanonicalType.getUnqualifiedType())) + return CanonicalType.getUnqualifiedType()->getAsConstantArrayType(); + return 0; + } + + // If this is a typedef for a constant array type, strip the typedef off + // without losing all typedef information. + return getDesugaredType()->getAsConstantArrayType(); +} + +const VariableArrayType *Type::getAsVariableArrayType() const { + // If this is directly a variable array type, return it. + if (const VariableArrayType *ATy = dyn_cast<VariableArrayType>(this)) + return ATy; + + // If the canonical form of this type isn't the right kind, reject it. + if (!isa<VariableArrayType>(CanonicalType)) { + // Look through type qualifiers + if (isa<VariableArrayType>(CanonicalType.getUnqualifiedType())) + return CanonicalType.getUnqualifiedType()->getAsVariableArrayType(); + return 0; + } + + // If this is a typedef for a variable array type, strip the typedef off + // without losing all typedef information. + return getDesugaredType()->getAsVariableArrayType(); +} + +/// isVariablyModifiedType (C99 6.7.5p3) - Return true for variable length +/// array types and types that contain variable array types in their +/// declarator +bool Type::isVariablyModifiedType() const { + // A VLA is a veriably modified type + if (getAsVariableArrayType()) + return true; + + // An array can contain a variably modified type + if (const ArrayType* AT = getAsArrayType()) + return AT->getElementType()->isVariablyModifiedType(); + + // A pointer can point to a variably modified type + if (const PointerType* PT = getAsPointerType()) + return PT->getPointeeType()->isVariablyModifiedType(); + + // A function can return a variably modified type + // This one isn't completely obvious, but it follows from the + // definition in C99 6.7.5p3. Because of this rule, it's + // illegal to declare a function returning a variably modified type. + if (const FunctionType* FT = getAsFunctionType()) + return FT->getResultType()->isVariablyModifiedType(); + + return false; +} + +bool Type::isIncompleteArrayType() const { + return isa<IncompleteArrayType>(CanonicalType); +} + +const IncompleteArrayType *Type::getAsIncompleteArrayType() const { + // If this is directly a variable array type, return it. + if (const IncompleteArrayType *ATy = dyn_cast<IncompleteArrayType>(this)) + return ATy; + + // If the canonical form of this type isn't the right kind, reject it. + if (!isa<IncompleteArrayType>(CanonicalType)) { + // Look through type qualifiers + if (isa<IncompleteArrayType>(CanonicalType.getUnqualifiedType())) + return CanonicalType.getUnqualifiedType()->getAsIncompleteArrayType(); + return 0; + } + + // If this is a typedef for a variable array type, strip the typedef off + // without losing all typedef information. + return getDesugaredType()->getAsIncompleteArrayType(); +} + +const RecordType *Type::getAsRecordType() const { + // If this is directly a reference type, return it. + if (const RecordType *RTy = dyn_cast<RecordType>(this)) + return RTy; + + // If the canonical form of this type isn't the right kind, reject it. + if (!isa<RecordType>(CanonicalType)) { + // Look through type qualifiers + if (isa<RecordType>(CanonicalType.getUnqualifiedType())) + return CanonicalType.getUnqualifiedType()->getAsRecordType(); + return 0; + } + + // If this is a typedef for a record type, strip the typedef off without + // losing all typedef information. + return getDesugaredType()->getAsRecordType(); +} + +const RecordType *Type::getAsStructureType() const { + // If this is directly a structure type, return it. + if (const RecordType *RT = dyn_cast<RecordType>(this)) { + if (RT->getDecl()->getKind() == Decl::Struct) + return RT; + } + + // If the canonical form of this type isn't the right kind, reject it. + if (const RecordType *RT = dyn_cast<RecordType>(CanonicalType)) { + if (RT->getDecl()->getKind() != Decl::Struct) + return 0; + + // If this is a typedef for a structure type, strip the typedef off without + // losing all typedef information. + return getDesugaredType()->getAsStructureType(); + } + // Look through type qualifiers + if (isa<RecordType>(CanonicalType.getUnqualifiedType())) + return CanonicalType.getUnqualifiedType()->getAsStructureType(); + return 0; +} + +const RecordType *Type::getAsUnionType() const { + // If this is directly a union type, return it. + if (const RecordType *RT = dyn_cast<RecordType>(this)) { + if (RT->getDecl()->getKind() == Decl::Union) + return RT; + } + + // If the canonical form of this type isn't the right kind, reject it. + if (const RecordType *RT = dyn_cast<RecordType>(CanonicalType)) { + if (RT->getDecl()->getKind() != Decl::Union) + return 0; + + // If this is a typedef for a union type, strip the typedef off without + // losing all typedef information. + return getDesugaredType()->getAsUnionType(); + } + + // Look through type qualifiers + if (isa<RecordType>(CanonicalType.getUnqualifiedType())) + return CanonicalType.getUnqualifiedType()->getAsUnionType(); + return 0; +} + +const ComplexType *Type::getAsComplexType() const { + // Are we directly a complex type? + if (const ComplexType *CTy = dyn_cast<ComplexType>(this)) + return CTy; + + // If the canonical form of this type isn't the right kind, reject it. + if (!isa<ComplexType>(CanonicalType)) { + // Look through type qualifiers + if (isa<ComplexType>(CanonicalType.getUnqualifiedType())) + return CanonicalType.getUnqualifiedType()->getAsComplexType(); + return 0; + } + + // If this is a typedef for a complex type, strip the typedef off without + // losing all typedef information. + return getDesugaredType()->getAsComplexType(); +} + +const VectorType *Type::getAsVectorType() const { + // Are we directly a vector type? + if (const VectorType *VTy = dyn_cast<VectorType>(this)) + return VTy; + + // If the canonical form of this type isn't the right kind, reject it. + if (!isa<VectorType>(CanonicalType)) { + // Look through type qualifiers + if (isa<VectorType>(CanonicalType.getUnqualifiedType())) + return CanonicalType.getUnqualifiedType()->getAsVectorType(); + return 0; + } + + // If this is a typedef for a vector type, strip the typedef off without + // losing all typedef information. + return getDesugaredType()->getAsVectorType(); +} + +const OCUVectorType *Type::getAsOCUVectorType() const { + // Are we directly an OpenCU vector type? + if (const OCUVectorType *VTy = dyn_cast<OCUVectorType>(this)) + return VTy; + + // If the canonical form of this type isn't the right kind, reject it. + if (!isa<OCUVectorType>(CanonicalType)) { + // Look through type qualifiers + if (isa<OCUVectorType>(CanonicalType.getUnqualifiedType())) + return CanonicalType.getUnqualifiedType()->getAsOCUVectorType(); + return 0; + } + + // If this is a typedef for an ocuvector type, strip the typedef off without + // losing all typedef information. + return getDesugaredType()->getAsOCUVectorType(); +} + +bool Type::isIntegerType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() >= BuiltinType::Bool && + BT->getKind() <= BuiltinType::LongLong; + if (const TagType *TT = dyn_cast<TagType>(CanonicalType)) + if (TT->getDecl()->getKind() == Decl::Enum) + return true; + if (const VectorType *VT = dyn_cast<VectorType>(CanonicalType)) + return VT->getElementType()->isIntegerType(); + if (const ASQualType *ASQT = dyn_cast<ASQualType>(CanonicalType)) + return ASQT->getBaseType()->isIntegerType(); + return false; +} + +bool Type::isIntegralType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() >= BuiltinType::Bool && + BT->getKind() <= BuiltinType::LongLong; + if (const TagType *TT = dyn_cast<TagType>(CanonicalType)) + if (TT->getDecl()->getKind() == Decl::Enum) + return true; + if (const ASQualType *ASQT = dyn_cast<ASQualType>(CanonicalType)) + return ASQT->getBaseType()->isIntegralType(); + return false; +} + +bool Type::isEnumeralType() const { + if (const TagType *TT = dyn_cast<TagType>(CanonicalType)) + return TT->getDecl()->getKind() == Decl::Enum; + if (const ASQualType *ASQT = dyn_cast<ASQualType>(CanonicalType)) + return ASQT->getBaseType()->isEnumeralType(); + return false; +} + +bool Type::isBooleanType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() == BuiltinType::Bool; + if (const ASQualType *ASQT = dyn_cast<ASQualType>(CanonicalType)) + return ASQT->getBaseType()->isBooleanType(); + return false; +} + +bool Type::isCharType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() == BuiltinType::Char_U || + BT->getKind() == BuiltinType::UChar || + BT->getKind() == BuiltinType::Char_S || + BT->getKind() == BuiltinType::SChar; + if (const ASQualType *ASQT = dyn_cast<ASQualType>(CanonicalType)) + return ASQT->getBaseType()->isCharType(); + return false; +} + +/// isSignedIntegerType - Return true if this is an integer type that is +/// signed, according to C99 6.2.5p4 [char, signed char, short, int, long..], +/// an enum decl which has a signed representation, or a vector of signed +/// integer element type. +bool Type::isSignedIntegerType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) { + return BT->getKind() >= BuiltinType::Char_S && + BT->getKind() <= BuiltinType::LongLong; + } + + if (const TagType *TT = dyn_cast<TagType>(CanonicalType)) + if (const EnumDecl *ED = dyn_cast<EnumDecl>(TT->getDecl())) + return ED->getIntegerType()->isSignedIntegerType(); + + if (const VectorType *VT = dyn_cast<VectorType>(CanonicalType)) + return VT->getElementType()->isSignedIntegerType(); + if (const ASQualType *ASQT = dyn_cast<ASQualType>(CanonicalType)) + return ASQT->getBaseType()->isSignedIntegerType(); + return false; +} + +/// isUnsignedIntegerType - Return true if this is an integer type that is +/// unsigned, according to C99 6.2.5p6 [which returns true for _Bool], an enum +/// decl which has an unsigned representation, or a vector of unsigned integer +/// element type. +bool Type::isUnsignedIntegerType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) { + return BT->getKind() >= BuiltinType::Bool && + BT->getKind() <= BuiltinType::ULongLong; + } + + if (const TagType *TT = dyn_cast<TagType>(CanonicalType)) + if (const EnumDecl *ED = dyn_cast<EnumDecl>(TT->getDecl())) + return ED->getIntegerType()->isUnsignedIntegerType(); + + if (const VectorType *VT = dyn_cast<VectorType>(CanonicalType)) + return VT->getElementType()->isUnsignedIntegerType(); + if (const ASQualType *ASQT = dyn_cast<ASQualType>(CanonicalType)) + return ASQT->getBaseType()->isUnsignedIntegerType(); + return false; +} + +bool Type::isFloatingType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() >= BuiltinType::Float && + BT->getKind() <= BuiltinType::LongDouble; + if (const ComplexType *CT = dyn_cast<ComplexType>(CanonicalType)) + return CT->getElementType()->isFloatingType(); + if (const VectorType *VT = dyn_cast<VectorType>(CanonicalType)) + return VT->getElementType()->isFloatingType(); + if (const ASQualType *ASQT = dyn_cast<ASQualType>(CanonicalType)) + return ASQT->getBaseType()->isFloatingType(); + return false; +} + +bool Type::isRealFloatingType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() >= BuiltinType::Float && + BT->getKind() <= BuiltinType::LongDouble; + if (const VectorType *VT = dyn_cast<VectorType>(CanonicalType)) + return VT->getElementType()->isRealFloatingType(); + if (const ASQualType *ASQT = dyn_cast<ASQualType>(CanonicalType)) + return ASQT->getBaseType()->isRealFloatingType(); + return false; +} + +bool Type::isRealType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() >= BuiltinType::Bool && + BT->getKind() <= BuiltinType::LongDouble; + if (const TagType *TT = dyn_cast<TagType>(CanonicalType)) + return TT->getDecl()->getKind() == Decl::Enum; + if (const VectorType *VT = dyn_cast<VectorType>(CanonicalType)) + return VT->getElementType()->isRealType(); + if (const ASQualType *ASQT = dyn_cast<ASQualType>(CanonicalType)) + return ASQT->getBaseType()->isRealType(); + return false; +} + +bool Type::isArithmeticType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() != BuiltinType::Void; + if (const TagType *TT = dyn_cast<TagType>(CanonicalType)) + if (const EnumDecl *ED = dyn_cast<EnumDecl>(TT->getDecl())) + // GCC allows forward declaration of enum types (forbid by C99 6.7.2.3p2). + // If a body isn't seen by the time we get here, return false. + return ED->isDefinition(); + if (const ASQualType *ASQT = dyn_cast<ASQualType>(CanonicalType)) + return ASQT->getBaseType()->isArithmeticType(); + return isa<ComplexType>(CanonicalType) || isa<VectorType>(CanonicalType); +} + +bool Type::isScalarType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() != BuiltinType::Void; + if (const TagType *TT = dyn_cast<TagType>(CanonicalType)) { + if (TT->getDecl()->getKind() == Decl::Enum) + return true; + return false; + } + if (const ASQualType *ASQT = dyn_cast<ASQualType>(CanonicalType)) + return ASQT->getBaseType()->isScalarType(); + return isa<PointerType>(CanonicalType) || isa<ComplexType>(CanonicalType) || + isa<ObjCQualifiedIdType>(CanonicalType); +} + +bool Type::isAggregateType() const { + if (const TagType *TT = dyn_cast<TagType>(CanonicalType)) { + if (TT->getDecl()->getKind() == Decl::Struct) + return true; + return false; + } + if (const ASQualType *ASQT = dyn_cast<ASQualType>(CanonicalType)) + return ASQT->getBaseType()->isAggregateType(); + return isa<ArrayType>(CanonicalType); +} + +/// isConstantSizeType - Return true if this is not a variable sized type, +/// according to the rules of C99 6.7.5p3. It is not legal to call this on +/// incomplete types. +bool Type::isConstantSizeType() const { + if (const ASQualType *ASQT = dyn_cast<ASQualType>(CanonicalType)) + return ASQT->getBaseType()->isConstantSizeType(); + assert(!isIncompleteType() && "This doesn't make sense for incomplete types"); + // The VAT must have a size, as it is known to be complete. + return !isa<VariableArrayType>(CanonicalType); +} + +/// isIncompleteType - Return true if this is an incomplete type (C99 6.2.5p1) +/// - a type that can describe objects, but which lacks information needed to +/// determine its size. +bool Type::isIncompleteType() const { + switch (CanonicalType->getTypeClass()) { + default: return false; + case ASQual: + return cast<ASQualType>(CanonicalType)->getBaseType()->isIncompleteType(); + case Builtin: + // Void is the only incomplete builtin type. Per C99 6.2.5p19, it can never + // be completed. + return isVoidType(); + case Tagged: + // A tagged type (struct/union/enum/class) is incomplete if the decl is a + // forward declaration, but not a full definition (C99 6.2.5p22). + return !cast<TagType>(CanonicalType)->getDecl()->isDefinition(); + case IncompleteArray: + // An array of unknown size is an incomplete type (C99 6.2.5p22). + return true; + } +} + +bool Type::isPromotableIntegerType() const { + if (const ASQualType *ASQT = dyn_cast<ASQualType>(CanonicalType)) + return ASQT->getBaseType()->isPromotableIntegerType(); + const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType); + if (!BT) return false; + switch (BT->getKind()) { + case BuiltinType::Bool: + case BuiltinType::Char_S: + case BuiltinType::Char_U: + case BuiltinType::SChar: + case BuiltinType::UChar: + case BuiltinType::Short: + case BuiltinType::UShort: + return true; + default: + return false; + } +} + +const char *BuiltinType::getName() const { + switch (getKind()) { + default: assert(0 && "Unknown builtin type!"); + case Void: return "void"; + case Bool: return "_Bool"; + case Char_S: return "char"; + case Char_U: return "char"; + case SChar: return "signed char"; + case Short: return "short"; + case Int: return "int"; + case Long: return "long"; + case LongLong: return "long long"; + case UChar: return "unsigned char"; + case UShort: return "unsigned short"; + case UInt: return "unsigned int"; + case ULong: return "unsigned long"; + case ULongLong: return "unsigned long long"; + case Float: return "float"; + case Double: return "double"; + case LongDouble: return "long double"; + } +} + +void FunctionTypeProto::Profile(llvm::FoldingSetNodeID &ID, QualType Result, + arg_type_iterator ArgTys, + unsigned NumArgs, bool isVariadic) { + ID.AddPointer(Result.getAsOpaquePtr()); + for (unsigned i = 0; i != NumArgs; ++i) + ID.AddPointer(ArgTys[i].getAsOpaquePtr()); + ID.AddInteger(isVariadic); +} + +void FunctionTypeProto::Profile(llvm::FoldingSetNodeID &ID) { + Profile(ID, getResultType(), arg_type_begin(), NumArgs, isVariadic()); +} + +void ObjCQualifiedInterfaceType::Profile(llvm::FoldingSetNodeID &ID, + ObjCProtocolDecl **protocols, + unsigned NumProtocols) { + for (unsigned i = 0; i != NumProtocols; i++) + ID.AddPointer(protocols[i]); +} + +void ObjCQualifiedInterfaceType::Profile(llvm::FoldingSetNodeID &ID) { + Profile(ID, &Protocols[0], getNumProtocols()); +} + +void ObjCQualifiedIdType::Profile(llvm::FoldingSetNodeID &ID, + ObjCProtocolDecl **protocols, + unsigned NumProtocols) { + for (unsigned i = 0; i != NumProtocols; i++) + ID.AddPointer(protocols[i]); +} + +void ObjCQualifiedIdType::Profile(llvm::FoldingSetNodeID &ID) { + Profile(ID, &Protocols[0], getNumProtocols()); +} + +/// LookThroughTypedefs - Return the ultimate type this typedef corresponds to +/// potentially looking through *all* consequtive typedefs. This returns the +/// sum of the type qualifiers, so if you have: +/// typedef const int A; +/// typedef volatile A B; +/// looking through the typedefs for B will give you "const volatile A". +/// +QualType TypedefType::LookThroughTypedefs() const { + // Usually, there is only a single level of typedefs, be fast in that case. + QualType FirstType = getDecl()->getUnderlyingType(); + if (!isa<TypedefType>(FirstType)) + return FirstType; + + // Otherwise, do the fully general loop. + unsigned TypeQuals = 0; + const TypedefType *TDT = this; + while (1) { + QualType CurType = TDT->getDecl()->getUnderlyingType(); + + + /// FIXME: + /// FIXME: This is incorrect for ASQuals! + /// FIXME: + TypeQuals |= CurType.getCVRQualifiers(); + + TDT = dyn_cast<TypedefType>(CurType); + if (TDT == 0) + return QualType(CurType.getTypePtr(), TypeQuals); + } +} + +bool RecordType::classof(const Type *T) { + if (const TagType *TT = dyn_cast<TagType>(T)) + return isa<RecordDecl>(TT->getDecl()); + return false; +} + + +//===----------------------------------------------------------------------===// +// Type Printing +//===----------------------------------------------------------------------===// + +void QualType::dump(const char *msg) const { + std::string R = "identifier"; + getAsStringInternal(R); + if (msg) + fprintf(stderr, "%s: %s\n", msg, R.c_str()); + else + fprintf(stderr, "%s\n", R.c_str()); +} + +static void AppendTypeQualList(std::string &S, unsigned TypeQuals) { + // Note: funkiness to ensure we get a space only between quals. + bool NonePrinted = true; + if (TypeQuals & QualType::Const) + S += "const", NonePrinted = false; + if (TypeQuals & QualType::Volatile) + S += (NonePrinted+" volatile"), NonePrinted = false; + if (TypeQuals & QualType::Restrict) + S += (NonePrinted+" restrict"), NonePrinted = false; +} + +void QualType::getAsStringInternal(std::string &S) const { + if (isNull()) { + S += "NULL TYPE\n"; + return; + } + + // Print qualifiers as appropriate. + if (unsigned Tq = getCVRQualifiers()) { + std::string TQS; + AppendTypeQualList(TQS, Tq); + if (!S.empty()) + S = TQS + ' ' + S; + else + S = TQS; + } + + getTypePtr()->getAsStringInternal(S); +} + +void BuiltinType::getAsStringInternal(std::string &S) const { + if (S.empty()) { + S = getName(); + } else { + // Prefix the basic type, e.g. 'int X'. + S = ' ' + S; + S = getName() + S; + } +} + +void ComplexType::getAsStringInternal(std::string &S) const { + ElementType->getAsStringInternal(S); + S = "_Complex " + S; +} + +void ASQualType::getAsStringInternal(std::string &S) const { + S = "__attribute__((address_space("+llvm::utostr_32(AddressSpace)+")))" + S; + BaseType->getAsStringInternal(S); +} + +void PointerType::getAsStringInternal(std::string &S) const { + S = '*' + S; + + // Handle things like 'int (*A)[4];' correctly. + // FIXME: this should include vectors, but vectors use attributes I guess. + if (isa<ArrayType>(PointeeType.getTypePtr())) + S = '(' + S + ')'; + + PointeeType.getAsStringInternal(S); +} + +void ReferenceType::getAsStringInternal(std::string &S) const { + S = '&' + S; + + // Handle things like 'int (&A)[4];' correctly. + // FIXME: this should include vectors, but vectors use attributes I guess. + if (isa<ArrayType>(ReferenceeType.getTypePtr())) + S = '(' + S + ')'; + + ReferenceeType.getAsStringInternal(S); +} + +void ConstantArrayType::getAsStringInternal(std::string &S) const { + S += '['; + S += llvm::utostr(getSize().getZExtValue()); + S += ']'; + + getElementType().getAsStringInternal(S); +} + +void IncompleteArrayType::getAsStringInternal(std::string &S) const { + S += "[]"; + + getElementType().getAsStringInternal(S); +} + +void VariableArrayType::getAsStringInternal(std::string &S) const { + S += '['; + + if (getIndexTypeQualifier()) { + AppendTypeQualList(S, getIndexTypeQualifier()); + S += ' '; + } + + if (getSizeModifier() == Static) + S += "static"; + else if (getSizeModifier() == Star) + S += '*'; + + if (getSizeExpr()) { + std::ostringstream s; + getSizeExpr()->printPretty(s); + S += s.str(); + } + S += ']'; + + getElementType().getAsStringInternal(S); +} + +void VectorType::getAsStringInternal(std::string &S) const { + S += " __attribute__((__vector_size__("; + // FIXME: should multiply by element size somehow. + S += llvm::utostr_32(NumElements*4); // convert back to bytes. + S += ")))"; + ElementType.getAsStringInternal(S); +} + +void OCUVectorType::getAsStringInternal(std::string &S) const { + S += " __attribute__((ocu_vector_type("; + S += llvm::utostr_32(NumElements); + S += ")))"; + ElementType.getAsStringInternal(S); +} + +void TypeOfExpr::getAsStringInternal(std::string &InnerString) const { + if (!InnerString.empty()) // Prefix the basic type, e.g. 'typeof(e) X'. + InnerString = ' ' + InnerString; + std::ostringstream s; + getUnderlyingExpr()->printPretty(s); + InnerString = "typeof(" + s.str() + ")" + InnerString; +} + +void TypeOfType::getAsStringInternal(std::string &InnerString) const { + if (!InnerString.empty()) // Prefix the basic type, e.g. 'typeof(t) X'. + InnerString = ' ' + InnerString; + std::string Tmp; + getUnderlyingType().getAsStringInternal(Tmp); + InnerString = "typeof(" + Tmp + ")" + InnerString; +} + +void FunctionTypeNoProto::getAsStringInternal(std::string &S) const { + // If needed for precedence reasons, wrap the inner part in grouping parens. + if (!S.empty()) + S = "(" + S + ")"; + + S += "()"; + getResultType().getAsStringInternal(S); +} + +void FunctionTypeProto::getAsStringInternal(std::string &S) const { + // If needed for precedence reasons, wrap the inner part in grouping parens. + if (!S.empty()) + S = "(" + S + ")"; + + S += "("; + std::string Tmp; + for (unsigned i = 0, e = getNumArgs(); i != e; ++i) { + if (i) S += ", "; + getArgType(i).getAsStringInternal(Tmp); + S += Tmp; + Tmp.clear(); + } + + if (isVariadic()) { + if (getNumArgs()) + S += ", "; + S += "..."; + } else if (getNumArgs() == 0) { + // Do not emit int() if we have a proto, emit 'int(void)'. + S += "void"; + } + + S += ")"; + getResultType().getAsStringInternal(S); +} + + +void TypedefType::getAsStringInternal(std::string &InnerString) const { + if (!InnerString.empty()) // Prefix the basic type, e.g. 'typedefname X'. + InnerString = ' ' + InnerString; + InnerString = getDecl()->getIdentifier()->getName() + InnerString; +} + +void ObjCInterfaceType::getAsStringInternal(std::string &InnerString) const { + if (!InnerString.empty()) // Prefix the basic type, e.g. 'typedefname X'. + InnerString = ' ' + InnerString; + InnerString = getDecl()->getIdentifier()->getName() + InnerString; +} + +void ObjCQualifiedInterfaceType::getAsStringInternal( + std::string &InnerString) const { + if (!InnerString.empty()) // Prefix the basic type, e.g. 'typedefname X'. + InnerString = ' ' + InnerString; + std::string ObjCQIString = getDecl()->getName(); + ObjCQIString += '<'; + int num = getNumProtocols(); + for (int i = 0; i < num; i++) { + ObjCQIString += getProtocols(i)->getName(); + if (i < num-1) + ObjCQIString += ','; + } + ObjCQIString += '>'; + InnerString = ObjCQIString + InnerString; +} + +void ObjCQualifiedIdType::getAsStringInternal( + std::string &InnerString) const { + if (!InnerString.empty()) // Prefix the basic type, e.g. 'typedefname X'. + InnerString = ' ' + InnerString; + std::string ObjCQIString = "id"; + ObjCQIString += '<'; + int num = getNumProtocols(); + for (int i = 0; i < num; i++) { + ObjCQIString += getProtocols(i)->getName(); + if (i < num-1) + ObjCQIString += ','; + } + ObjCQIString += '>'; + InnerString = ObjCQIString + InnerString; +} + +void TagType::getAsStringInternal(std::string &InnerString) const { + if (!InnerString.empty()) // Prefix the basic type, e.g. 'typedefname X'. + InnerString = ' ' + InnerString; + + const char *Kind = getDecl()->getKindName(); + const char *ID; + if (const IdentifierInfo *II = getDecl()->getIdentifier()) + ID = II->getName(); + else + ID = "<anonymous>"; + + InnerString = std::string(Kind) + " " + ID + InnerString; +} diff --git a/clang/lib/AST/TypeSerialization.cpp b/clang/lib/AST/TypeSerialization.cpp new file mode 100644 index 00000000000..55c0a48a00d --- /dev/null +++ b/clang/lib/AST/TypeSerialization.cpp @@ -0,0 +1,293 @@ +//===--- TypeSerialization.cpp - Serialization of Decls ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines methods that implement bitcode serialization for Types. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Type.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ASTContext.h" +#include "llvm/Bitcode/Serialize.h" +#include "llvm/Bitcode/Deserialize.h" + +using namespace clang; +using llvm::Serializer; +using llvm::Deserializer; +using llvm::SerializedPtrID; + + +void QualType::Emit(Serializer& S) const { + S.EmitPtr(getTypePtr()); + S.EmitInt(getCVRQualifiers()); +} + +QualType QualType::ReadVal(Deserializer& D) { + QualType Q; + D.ReadUIntPtr(Q.ThePtr,false); + Q.ThePtr |= D.ReadInt(); + return Q; +} + +void QualType::ReadBackpatch(Deserializer& D) { + D.ReadUIntPtr(ThePtr,true); + ThePtr |= D.ReadInt(); +} + +//===----------------------------------------------------------------------===// +// Type Serialization: Dispatch code to handle specific types. +//===----------------------------------------------------------------------===// + +void Type::Emit(Serializer& S) const { + S.EmitInt(getTypeClass()); + S.EmitPtr(this); + + if (!isa<BuiltinType>(this)) + EmitImpl(S); +} + +void Type::EmitImpl(Serializer& S) const { + assert (false && "Serializization for type not supported."); +} + +void Type::Create(ASTContext& Context, unsigned i, Deserializer& D) { + Type::TypeClass K = static_cast<Type::TypeClass>(D.ReadInt()); + SerializedPtrID PtrID = D.ReadPtrID(); + + switch (K) { + default: + assert (false && "Deserialization for type not supported."); + break; + + case Type::Builtin: + assert (i < Context.getTypes().size()); + assert (isa<BuiltinType>(Context.getTypes()[i])); + D.RegisterPtr(PtrID,Context.getTypes()[i]); + break; + + case Type::ASQual: + D.RegisterPtr(PtrID,ASQualType::CreateImpl(Context,D)); + break; + + case Type::Complex: + D.RegisterPtr(PtrID,ComplexType::CreateImpl(Context,D)); + break; + + case Type::ConstantArray: + D.RegisterPtr(PtrID,ConstantArrayType::CreateImpl(Context,D)); + break; + + case Type::FunctionNoProto: + D.RegisterPtr(PtrID,FunctionTypeNoProto::CreateImpl(Context,D)); + break; + + case Type::FunctionProto: + D.RegisterPtr(PtrID,FunctionTypeProto::CreateImpl(Context,D)); + break; + + case Type::IncompleteArray: + D.RegisterPtr(PtrID,IncompleteArrayType::CreateImpl(Context,D)); + break; + + case Type::Pointer: + D.RegisterPtr(PtrID,PointerType::CreateImpl(Context,D)); + break; + + case Type::Tagged: + D.RegisterPtr(PtrID,TagType::CreateImpl(Context,D)); + break; + + case Type::TypeName: + D.RegisterPtr(PtrID,TypedefType::CreateImpl(Context,D)); + break; + + case Type::VariableArray: + D.RegisterPtr(PtrID,VariableArrayType::CreateImpl(Context,D)); + break; + } +} + +//===----------------------------------------------------------------------===// +// ASQualType +//===----------------------------------------------------------------------===// + +void ASQualType::EmitImpl(Serializer& S) const { + S.EmitPtr(getBaseType()); + S.EmitInt(getAddressSpace()); +} + +Type* ASQualType::CreateImpl(ASTContext& Context, Deserializer& D) { + QualType BaseTy = QualType::ReadVal(D); + unsigned AddressSpace = D.ReadInt(); + return Context.getASQualType(BaseTy, AddressSpace).getTypePtr(); +} + +//===----------------------------------------------------------------------===// +// ComplexType +//===----------------------------------------------------------------------===// + +void ComplexType::EmitImpl(Serializer& S) const { + S.Emit(getElementType()); +} + +Type* ComplexType::CreateImpl(ASTContext& Context, Deserializer& D) { + return Context.getComplexType(QualType::ReadVal(D)).getTypePtr(); +} + +//===----------------------------------------------------------------------===// +// ConstantArray +//===----------------------------------------------------------------------===// + +void ConstantArrayType::EmitImpl(Serializer& S) const { + S.Emit(getElementType()); + S.EmitInt(getSizeModifier()); + S.EmitInt(getIndexTypeQualifier()); + S.Emit(Size); +} + +Type* ConstantArrayType::CreateImpl(ASTContext& Context, Deserializer& D) { + QualType ElTy = QualType::ReadVal(D); + ArraySizeModifier am = static_cast<ArraySizeModifier>(D.ReadInt()); + unsigned ITQ = D.ReadInt(); + + llvm::APInt Size; + D.Read(Size); + + return Context.getConstantArrayType(ElTy,Size,am,ITQ).getTypePtr(); +} + +//===----------------------------------------------------------------------===// +// FunctionTypeNoProto +//===----------------------------------------------------------------------===// + +void FunctionTypeNoProto::EmitImpl(Serializer& S) const { + S.Emit(getResultType()); +} + +Type* FunctionTypeNoProto::CreateImpl(ASTContext& Context, Deserializer& D) { + return Context.getFunctionTypeNoProto(QualType::ReadVal(D)).getTypePtr(); +} + +//===----------------------------------------------------------------------===// +// FunctionTypeProto +//===----------------------------------------------------------------------===// + +void FunctionTypeProto::EmitImpl(Serializer& S) const { + S.Emit(getResultType()); + S.EmitBool(isVariadic()); + S.EmitInt(getNumArgs()); + + for (arg_type_iterator I=arg_type_begin(), E=arg_type_end(); I!=E; ++I) + S.Emit(*I); +} + +Type* FunctionTypeProto::CreateImpl(ASTContext& Context, Deserializer& D) { + QualType ResultType = QualType::ReadVal(D); + bool isVariadic = D.ReadBool(); + unsigned NumArgs = D.ReadInt(); + + llvm::SmallVector<QualType,15> Args; + + for (unsigned j = 0; j < NumArgs; ++j) + Args.push_back(QualType::ReadVal(D)); + + return Context.getFunctionType(ResultType,&*Args.begin(), + NumArgs,isVariadic).getTypePtr(); +} + +//===----------------------------------------------------------------------===// +// PointerType +//===----------------------------------------------------------------------===// + +void PointerType::EmitImpl(Serializer& S) const { + S.Emit(getPointeeType()); +} + +Type* PointerType::CreateImpl(ASTContext& Context, Deserializer& D) { + return Context.getPointerType(QualType::ReadVal(D)).getTypePtr(); +} + +//===----------------------------------------------------------------------===// +// TagType +//===----------------------------------------------------------------------===// + +void TagType::EmitImpl(Serializer& S) const { + S.EmitOwnedPtr(getDecl()); +} + +Type* TagType::CreateImpl(ASTContext& Context, Deserializer& D) { + std::vector<Type*>& Types = + const_cast<std::vector<Type*>&>(Context.getTypes()); + + TagType* T = new TagType(NULL,QualType()); + Types.push_back(T); + + // Deserialize the decl. + T->decl = cast<TagDecl>(D.ReadOwnedPtr<Decl>()); + + return T; +} + +//===----------------------------------------------------------------------===// +// TypedefType +//===----------------------------------------------------------------------===// + +void TypedefType::EmitImpl(Serializer& S) const { + S.Emit(QualType((Type*)this,0).getCanonicalType()); + S.EmitPtr(Decl); +} + +Type* TypedefType::CreateImpl(ASTContext& Context, Deserializer& D) { + std::vector<Type*>& Types = + const_cast<std::vector<Type*>&>(Context.getTypes()); + + TypedefType* T = new TypedefType(Type::TypeName, NULL,QualType::ReadVal(D)); + Types.push_back(T); + + D.ReadPtr(T->Decl); // May be backpatched. + return T; +} + +//===----------------------------------------------------------------------===// +// VariableArrayType +//===----------------------------------------------------------------------===// + +void VariableArrayType::EmitImpl(Serializer& S) const { + S.Emit(getElementType()); + S.EmitInt(getSizeModifier()); + S.EmitInt(getIndexTypeQualifier()); + S.EmitOwnedPtr(SizeExpr); +} + +Type* VariableArrayType::CreateImpl(ASTContext& Context, Deserializer& D) { + QualType ElTy = QualType::ReadVal(D); + ArraySizeModifier am = static_cast<ArraySizeModifier>(D.ReadInt()); + unsigned ITQ = D.ReadInt(); + Expr* SizeExpr = D.ReadOwnedPtr<Expr>(); + + return Context.getVariableArrayType(ElTy,SizeExpr,am,ITQ).getTypePtr(); +} + +//===----------------------------------------------------------------------===// +// IncompleteArrayType +//===----------------------------------------------------------------------===// + +void IncompleteArrayType::EmitImpl(Serializer& S) const { + S.Emit(getElementType()); + S.EmitInt(getSizeModifier()); + S.EmitInt(getIndexTypeQualifier()); +} + +Type* IncompleteArrayType::CreateImpl(ASTContext& Context, Deserializer& D) { + QualType ElTy = QualType::ReadVal(D); + ArraySizeModifier am = static_cast<ArraySizeModifier>(D.ReadInt()); + unsigned ITQ = D.ReadInt(); + + return Context.getIncompleteArrayType(ElTy,am,ITQ).getTypePtr(); +} diff --git a/clang/lib/Analysis/BasicValueFactory.cpp b/clang/lib/Analysis/BasicValueFactory.cpp new file mode 100644 index 00000000000..88b360d1d0e --- /dev/null +++ b/clang/lib/Analysis/BasicValueFactory.cpp @@ -0,0 +1,167 @@ +//=== BasicValueFactory.cpp - Basic values for Path Sens analysis --*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines BasicValueFactory, a class that manages the lifetime +// of APSInt objects and symbolic constraints used by GRExprEngine +// and related classes. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/PathSensitive/BasicValueFactory.h" + +using namespace clang; + +BasicValueFactory::~BasicValueFactory() { + // Note that the dstor for the contents of APSIntSet will never be called, + // so we iterate over the set and invoke the dstor for each APSInt. This + // frees an aux. memory allocated to represent very large constants. + for (APSIntSetTy::iterator I=APSIntSet.begin(), E=APSIntSet.end(); I!=E; ++I) + I->getValue().~APSInt(); +} + +const llvm::APSInt& BasicValueFactory::getValue(const llvm::APSInt& X) { + llvm::FoldingSetNodeID ID; + void* InsertPos; + typedef llvm::FoldingSetNodeWrapper<llvm::APSInt> FoldNodeTy; + + X.Profile(ID); + FoldNodeTy* P = APSIntSet.FindNodeOrInsertPos(ID, InsertPos); + + if (!P) { + P = (FoldNodeTy*) BPAlloc.Allocate<FoldNodeTy>(); + new (P) FoldNodeTy(X); + APSIntSet.InsertNode(P, InsertPos); + } + + return *P; +} + +const llvm::APSInt& BasicValueFactory::getValue(uint64_t X, unsigned BitWidth, + bool isUnsigned) { + llvm::APSInt V(BitWidth, isUnsigned); + V = X; + return getValue(V); +} + +const llvm::APSInt& BasicValueFactory::getValue(uint64_t X, QualType T) { + + unsigned bits = Ctx.getTypeSize(T); + llvm::APSInt V(bits, T->isUnsignedIntegerType()); + V = X; + return getValue(V); +} + +const SymIntConstraint& +BasicValueFactory::getConstraint(SymbolID sym, BinaryOperator::Opcode Op, + const llvm::APSInt& V) { + + llvm::FoldingSetNodeID ID; + SymIntConstraint::Profile(ID, sym, Op, V); + void* InsertPos; + + SymIntConstraint* C = SymIntCSet.FindNodeOrInsertPos(ID, InsertPos); + + if (!C) { + C = (SymIntConstraint*) BPAlloc.Allocate<SymIntConstraint>(); + new (C) SymIntConstraint(sym, Op, V); + SymIntCSet.InsertNode(C, InsertPos); + } + + return *C; +} + +const llvm::APSInt* +BasicValueFactory::EvaluateAPSInt(BinaryOperator::Opcode Op, + const llvm::APSInt& V1, const llvm::APSInt& V2) { + + switch (Op) { + default: + assert (false && "Invalid Opcode."); + + case BinaryOperator::Mul: + return &getValue( V1 * V2 ); + + case BinaryOperator::Div: + return &getValue( V1 / V2 ); + + case BinaryOperator::Rem: + return &getValue( V1 % V2 ); + + case BinaryOperator::Add: + return &getValue( V1 + V2 ); + + case BinaryOperator::Sub: + return &getValue( V1 - V2 ); + + case BinaryOperator::Shl: { + + // FIXME: This logic should probably go higher up, where we can + // test these conditions symbolically. + + // FIXME: Expand these checks to include all undefined behavior. + + if (V2.isSigned() && V2.isNegative()) + return NULL; + + uint64_t Amt = V2.getZExtValue(); + + if (Amt > V1.getBitWidth()) + return NULL; + + return &getValue( V1.operator<<( (unsigned) Amt )); + } + + case BinaryOperator::Shr: { + + // FIXME: This logic should probably go higher up, where we can + // test these conditions symbolically. + + // FIXME: Expand these checks to include all undefined behavior. + + if (V2.isSigned() && V2.isNegative()) + return NULL; + + uint64_t Amt = V2.getZExtValue(); + + if (Amt > V1.getBitWidth()) + return NULL; + + return &getValue( V1.operator>>( (unsigned) Amt )); + } + + case BinaryOperator::LT: + return &getTruthValue( V1 < V2 ); + + case BinaryOperator::GT: + return &getTruthValue( V1 > V2 ); + + case BinaryOperator::LE: + return &getTruthValue( V1 <= V2 ); + + case BinaryOperator::GE: + return &getTruthValue( V1 >= V2 ); + + case BinaryOperator::EQ: + return &getTruthValue( V1 == V2 ); + + case BinaryOperator::NE: + return &getTruthValue( V1 != V2 ); + + // Note: LAnd, LOr, Comma are handled specially by higher-level logic. + + case BinaryOperator::And: + return &getValue( V1 & V2 ); + + case BinaryOperator::Or: + return &getValue( V1 | V2 ); + + case BinaryOperator::Xor: + return &getValue( V1 ^ V2 ); + } +} diff --git a/clang/lib/Analysis/CFRefCount.cpp b/clang/lib/Analysis/CFRefCount.cpp new file mode 100644 index 00000000000..77bbba25ea3 --- /dev/null +++ b/clang/lib/Analysis/CFRefCount.cpp @@ -0,0 +1,796 @@ +// CFRefCount.cpp - Transfer functions for tracking simple values -*- C++ -*--// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the methods for CFRefCount, which implements +// a reference count checker for Core Foundation (Mac OS X). +// +//===----------------------------------------------------------------------===// + +#include "GRSimpleVals.h" +#include "clang/Analysis/PathSensitive/ValueState.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Analysis/LocalCheckers.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/ImmutableMap.h" +#include <ostream> + +using namespace clang; + +namespace { + enum ArgEffect { IncRef, DecRef, DoNothing }; + typedef std::vector<ArgEffect> ArgEffects; +} + +namespace llvm { + template <> struct FoldingSetTrait<ArgEffects> { + static void Profile(const ArgEffects& X, FoldingSetNodeID ID) { + for (ArgEffects::const_iterator I = X.begin(), E = X.end(); I!= E; ++I) + ID.AddInteger((unsigned) *I); + } + + static void Profile(ArgEffects& X, FoldingSetNodeID ID) { + Profile(X, ID); + } + }; +} // end llvm namespace + +namespace { + +class RetEffect { +public: + enum Kind { Alias = 0x0, OwnedSymbol = 0x1, NotOwnedSymbol = 0x2 }; + +private: + unsigned Data; + RetEffect(Kind k, unsigned D) { Data = (Data << 2) | (unsigned) k; } + +public: + + Kind getKind() const { return (Kind) (Data & 0x3); } + + unsigned getValue() const { + assert(getKind() == Alias); + return Data & ~0x3; + } + + static RetEffect MakeAlias(unsigned Idx) { return RetEffect(Alias, Idx); } + + static RetEffect MakeOwned() { return RetEffect(OwnedSymbol, 0); } + + static RetEffect MakeNotOwned() { return RetEffect(NotOwnedSymbol, 0); } + + operator Kind() const { return getKind(); } + + void Profile(llvm::FoldingSetNodeID& ID) const { ID.AddInteger(Data); } +}; + + +class CFRefSummary : public llvm::FoldingSetNode { + ArgEffects* Args; + RetEffect Ret; +public: + + CFRefSummary(ArgEffects* A, RetEffect R) : Args(A), Ret(R) {} + + unsigned getNumArgs() const { return Args->size(); } + + ArgEffect getArg(unsigned idx) const { + assert (idx < getNumArgs()); + return (*Args)[idx]; + } + + RetEffect getRet() const { + return Ret; + } + + typedef ArgEffects::const_iterator arg_iterator; + + arg_iterator begin_args() const { return Args->begin(); } + arg_iterator end_args() const { return Args->end(); } + + static void Profile(llvm::FoldingSetNodeID& ID, ArgEffects* A, RetEffect R) { + ID.AddPointer(A); + ID.Add(R); + } + + void Profile(llvm::FoldingSetNodeID& ID) const { + Profile(ID, Args, Ret); + } +}; + + +class CFRefSummaryManager { + typedef llvm::FoldingSet<llvm::FoldingSetNodeWrapper<ArgEffects> > AESetTy; + typedef llvm::FoldingSet<CFRefSummary> SummarySetTy; + typedef llvm::DenseMap<FunctionDecl*, CFRefSummary*> SummaryMapTy; + + SummarySetTy SummarySet; + SummaryMapTy SummaryMap; + AESetTy AESet; + llvm::BumpPtrAllocator BPAlloc; + + ArgEffects ScratchArgs; + + + ArgEffects* getArgEffects(); + + CFRefSummary* getCannedCFSummary(FunctionTypeProto* FT, bool isRetain); + + CFRefSummary* getCFSummary(FunctionDecl* FD, const char* FName); + + CFRefSummary* getCFSummaryCreateRule(FunctionTypeProto* FT); + CFRefSummary* getCFSummaryGetRule(FunctionTypeProto* FT); + + CFRefSummary* getPersistentSummary(ArgEffects* AE, RetEffect RE); + +public: + CFRefSummaryManager() {} + ~CFRefSummaryManager(); + + CFRefSummary* getSummary(FunctionDecl* FD, ASTContext& Ctx); +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Implementation of checker data structures. +//===----------------------------------------------------------------------===// + +CFRefSummaryManager::~CFRefSummaryManager() { + + // FIXME: The ArgEffects could eventually be allocated from BPAlloc, + // mitigating the need to do explicit cleanup of the + // Argument-Effect summaries. + + for (AESetTy::iterator I = AESet.begin(), E = AESet.end(); I!=E; ++I) + I->getValue().~ArgEffects(); +} + +ArgEffects* CFRefSummaryManager::getArgEffects() { + + assert (!ScratchArgs.empty()); + + llvm::FoldingSetNodeID profile; + profile.Add(ScratchArgs); + void* InsertPos; + + llvm::FoldingSetNodeWrapper<ArgEffects>* E = + AESet.FindNodeOrInsertPos(profile, InsertPos); + + if (E) { + ScratchArgs.clear(); + return &E->getValue(); + } + + E = (llvm::FoldingSetNodeWrapper<ArgEffects>*) + BPAlloc.Allocate<llvm::FoldingSetNodeWrapper<ArgEffects> >(); + + new (E) llvm::FoldingSetNodeWrapper<ArgEffects>(ScratchArgs); + AESet.InsertNode(E, InsertPos); + + ScratchArgs.clear(); + return &E->getValue(); +} + +CFRefSummary* CFRefSummaryManager::getPersistentSummary(ArgEffects* AE, + RetEffect RE) { + + llvm::FoldingSetNodeID profile; + CFRefSummary::Profile(profile, AE, RE); + void* InsertPos; + + CFRefSummary* Summ = SummarySet.FindNodeOrInsertPos(profile, InsertPos); + + if (Summ) + return Summ; + + Summ = (CFRefSummary*) BPAlloc.Allocate<CFRefSummary>(); + new (Summ) CFRefSummary(AE, RE); + SummarySet.InsertNode(Summ, InsertPos); + + return Summ; +} + + +CFRefSummary* CFRefSummaryManager::getSummary(FunctionDecl* FD, + ASTContext& Ctx) { + + SourceLocation Loc = FD->getLocation(); + + if (!Loc.isFileID()) + return NULL; + + { // Look into our cache of summaries to see if we have already computed + // a summary for this FunctionDecl. + + SummaryMapTy::iterator I = SummaryMap.find(FD); + + if (I != SummaryMap.end()) + return I->second; + } + +#if 0 + SourceManager& SrcMgr = Ctx.getSourceManager(); + unsigned fid = Loc.getFileID(); + const FileEntry* FE = SrcMgr.getFileEntryForID(fid); + + if (!FE) + return NULL; + + const char* DirName = FE->getDir()->getName(); + assert (DirName); + assert (strlen(DirName) > 0); + + if (!strstr(DirName, "CoreFoundation")) { + SummaryMap[FD] = NULL; + return NULL; + } +#endif + + const char* FName = FD->getIdentifier()->getName(); + + if (FName[0] == 'C' && FName[1] == 'F') { + CFRefSummary* S = getCFSummary(FD, FName); + SummaryMap[FD] = S; + return S; + } + + return NULL; +} + +CFRefSummary* CFRefSummaryManager::getCFSummary(FunctionDecl* FD, + const char* FName) { + + // For now, only generate summaries for functions that have a prototype. + + FunctionTypeProto* FT = + dyn_cast<FunctionTypeProto>(FD->getType().getTypePtr()); + + if (!FT) + return NULL; + + FName += 2; + + if (strcmp(FName, "Retain") == 0) + return getCannedCFSummary(FT, true); + + if (strcmp(FName, "Release") == 0) + return getCannedCFSummary(FT, false); + + assert (ScratchArgs.empty()); + bool usesCreateRule = false; + + if (strstr(FName, "Create")) + usesCreateRule = true; + + if (!usesCreateRule && strstr(FName, "Copy")) + usesCreateRule = true; + + if (usesCreateRule) + return getCFSummaryCreateRule(FT); + + if (strstr(FName, "Get")) + return getCFSummaryGetRule(FT); + + return NULL; +} + +CFRefSummary* CFRefSummaryManager::getCannedCFSummary(FunctionTypeProto* FT, + bool isRetain) { + + if (FT->getNumArgs() != 1) + return NULL; + + TypedefType* ArgT = dyn_cast<TypedefType>(FT->getArgType(0).getTypePtr()); + + if (!ArgT) + return NULL; + + // For CFRetain/CFRelease, the first (and only) argument is of type + // "CFTypeRef". + + const char* TDName = ArgT->getDecl()->getIdentifier()->getName(); + assert (TDName); + + if (strcmp("CFTypeRef", TDName) == 0) + return NULL; + + if (!ArgT->isPointerType()) + return NULL; + + // Check the return type. It should also be "CFTypeRef". + + QualType RetTy = FT->getResultType(); + + if (RetTy.getTypePtr() != ArgT) + return NULL; + + // The function's interface checks out. Generate a canned summary. + + assert (ScratchArgs.empty()); + ScratchArgs.push_back(isRetain ? IncRef : DecRef); + + return getPersistentSummary(getArgEffects(), RetEffect::MakeAlias(0)); +} + +static bool isCFRefType(QualType T) { + + if (!T->isPointerType()) + return false; + + // Check the typedef for the name "CF" and the substring "Ref". + + TypedefType* TD = dyn_cast<TypedefType>(T.getTypePtr()); + + if (!TD) + return false; + + const char* TDName = TD->getDecl()->getIdentifier()->getName(); + assert (TDName); + + if (TDName[0] != 'C' || TDName[1] != 'F') + return false; + + if (strstr(TDName, "Ref") == 0) + return false; + + return true; +} + + +CFRefSummary* +CFRefSummaryManager::getCFSummaryCreateRule(FunctionTypeProto* FT) { + + if (!isCFRefType(FT->getResultType())) + return NULL; + + assert (ScratchArgs.empty()); + + // FIXME: Add special-cases for functions that retain/release. For now + // just handle the default case. + + for (unsigned i = 0, n = FT->getNumArgs(); i != n; ++i) + ScratchArgs.push_back(DoNothing); + + return getPersistentSummary(getArgEffects(), RetEffect::MakeOwned()); +} + +CFRefSummary* +CFRefSummaryManager::getCFSummaryGetRule(FunctionTypeProto* FT) { + + if (!isCFRefType(FT->getResultType())) + return NULL; + + assert (ScratchArgs.empty()); + + // FIXME: Add special-cases for functions that retain/release. For now + // just handle the default case. + + for (unsigned i = 0, n = FT->getNumArgs(); i != n; ++i) + ScratchArgs.push_back(DoNothing); + + return getPersistentSummary(getArgEffects(), RetEffect::MakeNotOwned()); +} + +//===----------------------------------------------------------------------===// +// Transfer functions. +//===----------------------------------------------------------------------===// + +namespace { + +class RefVal { + unsigned Data; + + RefVal(unsigned K, unsigned D) : Data((D << 3) | K) { + assert ((K & ~0x5) == 0x0); + } + + RefVal(unsigned K) : Data(K) { + assert ((K & ~0x5) == 0x0); + } + +public: + enum Kind { Owned = 0, AcqOwned = 1, NotOwned = 2, Released = 3, + ErrorUseAfterRelease = 4, ErrorReleaseNotOwned = 5 }; + + + Kind getKind() const { return (Kind) (Data & 0x5); } + + unsigned getCount() const { + assert (getKind() == Owned || getKind() == AcqOwned); + return Data >> 3; + } + + static bool isError(Kind k) { return k >= ErrorUseAfterRelease; } + + static RefVal makeOwned(unsigned Count) { return RefVal(Owned, Count); } + static RefVal makeAcqOwned(unsigned Count) { return RefVal(AcqOwned, Count); } + static RefVal makeNotOwned() { return RefVal(NotOwned); } + static RefVal makeReleased() { return RefVal(Released); } + static RefVal makeUseAfterRelease() { return RefVal(ErrorUseAfterRelease); } + static RefVal makeReleaseNotOwned() { return RefVal(ErrorReleaseNotOwned); } + + bool operator==(const RefVal& X) const { return Data == X.Data; } + void Profile(llvm::FoldingSetNodeID& ID) const { ID.AddInteger(Data); } + + void print(std::ostream& Out) const; +}; + +void RefVal::print(std::ostream& Out) const { + switch (getKind()) { + default: assert(false); + case Owned: + Out << "Owned(" << getCount() << ")"; + break; + + case AcqOwned: + Out << "Acquired-Owned(" << getCount() << ")"; + break; + + case NotOwned: + Out << "Not-Owned"; + break; + + case Released: + Out << "Released"; + break; + + case ErrorUseAfterRelease: + Out << "Use-After-Release [ERROR]"; + break; + + case ErrorReleaseNotOwned: + Out << "Release of Not-Owned [ERROR]"; + break; + } +} + +class CFRefCount : public GRSimpleVals { + + // Type definitions. + + typedef llvm::ImmutableMap<SymbolID, RefVal> RefBindings; + typedef RefBindings::Factory RefBFactoryTy; + + typedef llvm::SmallPtrSet<GRExprEngine::NodeTy*,2> UseAfterReleasesTy; + typedef llvm::SmallPtrSet<GRExprEngine::NodeTy*,2> ReleasesNotOwnedTy; + + class BindingsPrinter : public ValueState::CheckerStatePrinter { + public: + virtual void PrintCheckerState(std::ostream& Out, void* State, + const char* nl, const char* sep); + }; + + // Instance variables. + + CFRefSummaryManager Summaries; + RefBFactoryTy RefBFactory; + + UseAfterReleasesTy UseAfterReleases; + ReleasesNotOwnedTy ReleasesNotOwned; + + BindingsPrinter Printer; + + // Private methods. + + static RefBindings GetRefBindings(ValueState& StImpl) { + return RefBindings((RefBindings::TreeTy*) StImpl.CheckerState); + } + + static void SetRefBindings(ValueState& StImpl, RefBindings B) { + StImpl.CheckerState = B.getRoot(); + } + + RefBindings Remove(RefBindings B, SymbolID sym) { + return RefBFactory.Remove(B, sym); + } + + RefBindings Update(RefBindings B, SymbolID sym, RefVal V, ArgEffect E, + RefVal::Kind& hasError); + + +public: + CFRefCount() {} + virtual ~CFRefCount() {} + + virtual ValueState::CheckerStatePrinter* getCheckerStatePrinter() { + return &Printer; + } + + // Calls. + + virtual void EvalCall(ExplodedNodeSet<ValueState>& Dst, + GRExprEngine& Eng, + GRStmtNodeBuilder<ValueState>& Builder, + CallExpr* CE, LVal L, + ExplodedNode<ValueState>* Pred); +}; + +} // end anonymous namespace + +void CFRefCount::BindingsPrinter::PrintCheckerState(std::ostream& Out, + void* State, const char* nl, + const char* sep) { + RefBindings B((RefBindings::TreeTy*) State); + + if (State) + Out << sep << nl; + + for (RefBindings::iterator I=B.begin(), E=B.end(); I!=E; ++I) { + Out << (*I).first << " : "; + (*I).second.print(Out); + Out << nl; + } +} + +void CFRefCount::EvalCall(ExplodedNodeSet<ValueState>& Dst, + GRExprEngine& Eng, + GRStmtNodeBuilder<ValueState>& Builder, + CallExpr* CE, LVal L, + ExplodedNode<ValueState>* Pred) { + + ValueStateManager& StateMgr = Eng.getStateManager(); + + // FIXME: Support calls to things other than lval::FuncVal. At the very + // least we should stop tracking ref-state for ref-counted objects passed + // to these functions. + + assert (isa<lval::FuncVal>(L) && "Not yet implemented."); + + // Get the summary. + + lval::FuncVal FV = cast<lval::FuncVal>(L); + FunctionDecl* FD = FV.getDecl(); + CFRefSummary* Summ = Summaries.getSummary(FD, Eng.getContext()); + + // Get the state. + + ValueState* St = Builder.GetState(Pred); + + // Evaluate the effects of the call. + + ValueState StVals = *St; + RefVal::Kind hasError = (RefVal::Kind) 0; + + if (!Summ) { + + // This function has no summary. Invalidate all reference-count state + // for arguments passed to this function, and also nuke the values of + // arguments passed-by-reference. + + ValueState StVals = *St; + + for (CallExpr::arg_iterator I = CE->arg_begin(), E = CE->arg_end(); + I != E; ++I) { + + RVal V = StateMgr.GetRVal(St, *I); + + if (isa<lval::SymbolVal>(V)) { + SymbolID Sym = cast<lval::SymbolVal>(V).getSymbol(); + RefBindings B = GetRefBindings(StVals); + SetRefBindings(StVals, Remove(B, Sym)); + } + + if (isa<LVal>(V)) + StateMgr.Unbind(StVals, cast<LVal>(V)); + } + + St = StateMgr.getPersistentState(StVals); + + // Make up a symbol for the return value of this function. + + if (CE->getType() != Eng.getContext().VoidTy) { + unsigned Count = Builder.getCurrentBlockCount(); + SymbolID Sym = Eng.getSymbolManager().getConjuredSymbol(CE, Count); + + RVal X = CE->getType()->isPointerType() + ? cast<RVal>(lval::SymbolVal(Sym)) + : cast<RVal>(nonlval::SymbolVal(Sym)); + + St = StateMgr.SetRVal(St, CE, X, Eng.getCFG().isBlkExpr(CE), false); + } + + Builder.Nodify(Dst, CE, Pred, St); + return; + } + + // This function has a summary. Evaluate the effect of the arguments. + + unsigned idx = 0; + + for (CallExpr::arg_iterator I=CE->arg_begin(), E=CE->arg_end(); + I!=E; ++I, ++idx) { + + RVal V = StateMgr.GetRVal(St, *I); + + if (isa<lval::SymbolVal>(V)) { + SymbolID Sym = cast<lval::SymbolVal>(V).getSymbol(); + RefBindings B = GetRefBindings(StVals); + + if (RefBindings::TreeTy* T = B.SlimFind(Sym)) { + B = Update(B, Sym, T->getValue().second, Summ->getArg(idx), hasError); + SetRefBindings(StVals, B); + if (hasError) break; + } + } + } + + if (hasError) { + St = StateMgr.getPersistentState(StVals); + GRExprEngine::NodeTy* N = Builder.generateNode(CE, St, Pred); + + if (N) { + N->markAsSink(); + + switch (hasError) { + default: assert(false); + case RefVal::ErrorUseAfterRelease: + UseAfterReleases.insert(N); + break; + + case RefVal::ErrorReleaseNotOwned: + ReleasesNotOwned.insert(N); + break; + } + } + + return; + } + + // Finally, consult the summary for the return value. + + RetEffect RE = Summ->getRet(); + St = StateMgr.getPersistentState(StVals); + + + switch (RE.getKind()) { + default: + assert (false && "Unhandled RetEffect."); break; + + case RetEffect::Alias: { + unsigned idx = RE.getValue(); + assert (idx < CE->getNumArgs()); + RVal V = StateMgr.GetRVal(St, CE->getArg(idx)); + St = StateMgr.SetRVal(St, CE, V, Eng.getCFG().isBlkExpr(CE), false); + break; + } + + case RetEffect::OwnedSymbol: { + unsigned Count = Builder.getCurrentBlockCount(); + SymbolID Sym = Eng.getSymbolManager().getConjuredSymbol(CE, Count); + + ValueState StImpl = *St; + RefBindings B = GetRefBindings(StImpl); + SetRefBindings(StImpl, RefBFactory.Add(B, Sym, RefVal::makeOwned(1))); + + St = StateMgr.SetRVal(StateMgr.getPersistentState(StImpl), + CE, lval::SymbolVal(Sym), + Eng.getCFG().isBlkExpr(CE), false); + + break; + } + + case RetEffect::NotOwnedSymbol: { + unsigned Count = Builder.getCurrentBlockCount(); + SymbolID Sym = Eng.getSymbolManager().getConjuredSymbol(CE, Count); + + ValueState StImpl = *St; + RefBindings B = GetRefBindings(StImpl); + SetRefBindings(StImpl, RefBFactory.Add(B, Sym, RefVal::makeNotOwned())); + + St = StateMgr.SetRVal(StateMgr.getPersistentState(StImpl), + CE, lval::SymbolVal(Sym), + Eng.getCFG().isBlkExpr(CE), false); + + break; + } + } + + Builder.Nodify(Dst, CE, Pred, St); +} + + +CFRefCount::RefBindings CFRefCount::Update(RefBindings B, SymbolID sym, + RefVal V, ArgEffect E, + RefVal::Kind& hasError) { + + // FIXME: This dispatch can potentially be sped up by unifiying it into + // a single switch statement. Opt for simplicity for now. + + switch (E) { + default: + assert (false && "Unhandled CFRef transition."); + + case DoNothing: + if (V.getKind() == RefVal::Released) { + V = RefVal::makeUseAfterRelease(); + hasError = V.getKind(); + break; + } + + return B; + + case IncRef: + switch (V.getKind()) { + default: + assert(false); + + case RefVal::Owned: + V = RefVal::makeOwned(V.getCount()+1); break; + + case RefVal::AcqOwned: + V = RefVal::makeAcqOwned(V.getCount()+1); + break; + + case RefVal::NotOwned: + V = RefVal::makeAcqOwned(1); + break; + + case RefVal::Released: + V = RefVal::makeUseAfterRelease(); + hasError = V.getKind(); + break; + } + + case DecRef: + switch (V.getKind()) { + default: + assert (false); + + case RefVal::Owned: { + unsigned Count = V.getCount() - 1; + V = Count ? RefVal::makeOwned(Count) : RefVal::makeReleased(); + break; + } + + case RefVal::AcqOwned: { + unsigned Count = V.getCount() - 1; + V = Count ? RefVal::makeAcqOwned(Count) : RefVal::makeNotOwned(); + break; + } + + case RefVal::NotOwned: + V = RefVal::makeReleaseNotOwned(); + hasError = V.getKind(); + break; + + case RefVal::Released: + V = RefVal::makeUseAfterRelease(); + hasError = V.getKind(); + break; + } + } + + return RefBFactory.Add(B, sym, V); +} + +//===----------------------------------------------------------------------===// +// Driver for the CFRefCount Checker. +//===----------------------------------------------------------------------===// + +namespace clang { + + void CheckCFRefCount(CFG& cfg, Decl& CD, ASTContext& Ctx, + Diagnostic& Diag) { + + if (Diag.hasErrorOccurred()) + return; + + // FIXME: Refactor some day so this becomes a single function invocation. + + GRCoreEngine<GRExprEngine> Eng(cfg, CD, Ctx); + GRExprEngine* CS = &Eng.getCheckerState(); + CFRefCount TF; + CS->setTransferFunctions(TF); + Eng.ExecuteWorkList(20000); + + } + +} // end clang namespace diff --git a/clang/lib/Analysis/DeadStores.cpp b/clang/lib/Analysis/DeadStores.cpp new file mode 100644 index 00000000000..0848336e586 --- /dev/null +++ b/clang/lib/Analysis/DeadStores.cpp @@ -0,0 +1,87 @@ +//==- DeadStores.cpp - Check for stores to dead variables --------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a DeadStores, a flow-sensitive checker that looks for +// stores to variables that are no longer live. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/LocalCheckers.h" +#include "clang/Analysis/Analyses/LiveVariables.h" +#include "clang/Analysis/Visitors/CFGRecStmtVisitor.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/AST/ASTContext.h" +#include "llvm/Support/Compiler.h" + +using namespace clang; + +namespace { + +class VISIBILITY_HIDDEN DeadStoreObs : public LiveVariables::ObserverTy { + ASTContext &Ctx; + Diagnostic &Diags; +public: + DeadStoreObs(ASTContext &ctx,Diagnostic &diags) : Ctx(ctx), Diags(diags){} + virtual ~DeadStoreObs() {} + + virtual void ObserveStmt(Stmt* S, + const LiveVariables::AnalysisDataTy& AD, + const LiveVariables::ValTy& Live) { + + if (BinaryOperator* B = dyn_cast<BinaryOperator>(S)) { + if (!B->isAssignmentOp()) return; // Skip non-assignments. + + if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(B->getLHS())) + if (VarDecl* VD = dyn_cast<VarDecl>(DR->getDecl())) + if (VD->hasLocalStorage() && !Live(VD,AD)) { + SourceRange R = B->getRHS()->getSourceRange(); + Diags.Report(Ctx.getFullLoc(DR->getSourceRange().getBegin()), + diag::warn_dead_store, 0, 0, &R, 1); + } + } + else if(DeclStmt* DS = dyn_cast<DeclStmt>(S)) + // Iterate through the decls. Warn if any initializers are complex + // expressions that are not live (never used). + for (VarDecl* V = cast<VarDecl>(DS->getDecl()); V != NULL ; + V = cast_or_null<VarDecl>(V->getNextDeclarator())) { + if (V->hasLocalStorage()) + if (Expr* E = V->getInit()) { + if (!Live(DS->getDecl(),AD)) { + // Special case: check for initializations with constants. + // + // e.g. : int x = 0; + // + // If x is EVER assigned a new value later, don't issue + // a warning. This is because such initialization can be + // due to defensive programming. + if (!E->isConstantExpr(Ctx,NULL)) { + // Flag a warning. + SourceRange R = E->getSourceRange(); + Diags.Report(Ctx.getFullLoc(V->getLocation()), + diag::warn_dead_store, 0, 0, &R, 1); + } + } + } + } + } +}; + +} // end anonymous namespace + +namespace clang { + +void CheckDeadStores(CFG& cfg, ASTContext &Ctx, Diagnostic &Diags) { + + LiveVariables L(cfg); + L.runOnCFG(cfg); + DeadStoreObs A(Ctx, Diags); + L.runOnAllBlocks(cfg, &A); +} + +} // end namespace clang diff --git a/clang/lib/Analysis/ExplodedGraph.cpp b/clang/lib/Analysis/ExplodedGraph.cpp new file mode 100644 index 00000000000..2ba46d77d63 --- /dev/null +++ b/clang/lib/Analysis/ExplodedGraph.cpp @@ -0,0 +1,227 @@ +//=-- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -*- C++ -*------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the template classes ExplodedNode and ExplodedGraph, +// which represent a path-sensitive, intra-procedural "exploded graph." +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/PathSensitive/ExplodedGraph.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include <vector> +#include <list> + +using namespace clang; + + +static inline std::vector<ExplodedNodeImpl*>& getVector(void* P) { + return *reinterpret_cast<std::vector<ExplodedNodeImpl*>*>(P); +} + +void ExplodedNodeImpl::NodeGroup::addNode(ExplodedNodeImpl* N) { + + assert ((reinterpret_cast<uintptr_t>(N) & Mask) == 0x0); + assert (!getFlag()); + + if (getKind() == Size1) { + if (ExplodedNodeImpl* NOld = getNode()) { + std::vector<ExplodedNodeImpl*>* V = new std::vector<ExplodedNodeImpl*>(); + assert ((reinterpret_cast<uintptr_t>(V) & Mask) == 0x0); + V->push_back(NOld); + V->push_back(N); + P = reinterpret_cast<uintptr_t>(V) | SizeOther; + assert (getPtr() == (void*) V); + assert (getKind() == SizeOther); + } + else { + P = reinterpret_cast<uintptr_t>(N); + assert (getKind() == Size1); + } + } + else { + assert (getKind() == SizeOther); + getVector(getPtr()).push_back(N); + } +} + + +unsigned ExplodedNodeImpl::NodeGroup::size() const { + if (getFlag()) + return 0; + + if (getKind() == Size1) + return getNode() ? 1 : 0; + else + return getVector(getPtr()).size(); +} + +ExplodedNodeImpl** ExplodedNodeImpl::NodeGroup::begin() const { + if (getFlag()) + return NULL; + + if (getKind() == Size1) + return (ExplodedNodeImpl**) (getPtr() ? &P : NULL); + else + return const_cast<ExplodedNodeImpl**>(&*(getVector(getPtr()).begin())); +} + +ExplodedNodeImpl** ExplodedNodeImpl::NodeGroup::end() const { + if (getFlag()) + return NULL; + + if (getKind() == Size1) + return (ExplodedNodeImpl**) (getPtr() ? &P+1 : NULL); + else + return const_cast<ExplodedNodeImpl**>(&*(getVector(getPtr()).end())); +} + +ExplodedNodeImpl::NodeGroup::~NodeGroup() { + if (getKind() == SizeOther) delete &getVector(getPtr()); +} + +ExplodedGraphImpl* ExplodedGraphImpl::Trim(ExplodedNodeImpl** BeginSources, + ExplodedNodeImpl** EndSources) const{ + + typedef llvm::DenseMap<ExplodedNodeImpl*, ExplodedNodeImpl*> Pass1Ty; + typedef llvm::DenseMap<ExplodedNodeImpl*, ExplodedNodeImpl*> Pass2Ty; + + Pass1Ty Pass1; + Pass2Ty Pass2; + + llvm::SmallVector<ExplodedNodeImpl*, 10> WL2; + + { // ===- Pass 1 (reverse BFS) -=== + + // Enqueue the source nodes to the first worklist. + + std::list<std::pair<ExplodedNodeImpl*, ExplodedNodeImpl*> > WL1; + + for (ExplodedNodeImpl** I = BeginSources; I != EndSources; ++I) + WL1.push_back(std::make_pair(*I, *I)); + + // Process the worklist. + + while (!WL1.empty()) { + + ExplodedNodeImpl* N = WL1.back().first; + ExplodedNodeImpl* Src = WL1.back().second; + + WL1.pop_back(); + + if (Pass1.find(N) != Pass1.end()) + continue; + + bool PredHasSameSource = false; + bool VisitPreds = true; + + for (ExplodedNodeImpl** I=N->Preds.begin(), **E=N->Preds.end(); + I!=E; ++I) { + + Pass1Ty::iterator pi = Pass1.find(*I); + + if (pi == Pass1.end()) + continue; + + VisitPreds = false; + + if (pi->second == Src) { + PredHasSameSource = true; + break; + } + } + + if (VisitPreds || !PredHasSameSource) { + + Pass1[N] = Src; + + if (N->Preds.empty()) { + WL2.push_back(N); + continue; + } + } + else + Pass1[N] = NULL; + + if (VisitPreds) + for (ExplodedNodeImpl** I=N->Preds.begin(), **E=N->Preds.end(); + I!=E; ++I) + WL1.push_front(std::make_pair(*I, Src)); + } + } + + if (WL2.empty()) + return NULL; + + ExplodedGraphImpl* G = MakeEmptyGraph(); + + // ===- Pass 2 (forward DFS to construct the new graph) -=== + + while (!WL2.empty()) { + + ExplodedNodeImpl* N = WL2.back(); + WL2.pop_back(); + + // Skip this node if we have already processed it. + + if (Pass2.find(N) != Pass2.end()) + continue; + + // Create the corresponding node in the new graph. + + ExplodedNodeImpl* NewN = G->getNodeImpl(N->getLocation(), N->State, NULL); + Pass2[N] = NewN; + + if (N->Preds.empty()) + G->addRoot(NewN); + + // In the case that some of the intended predecessors of NewN have already + // been created, we should hook them up as predecessors. + + for (ExplodedNodeImpl **I=N->Preds.begin(), **E=N->Preds.end(); I!=E; ++I) { + + Pass2Ty::iterator PI = Pass2.find(*I); + + if (PI == Pass2.end()) + continue; + + NewN->addPredecessor(PI->second); + } + + // In the case that some of the intended successors of NewN have already + // been created, we should hook them up as successors. Otherwise, enqueue + // the new nodes from the original graph that should have nodes created + // in the new graph. + + for (ExplodedNodeImpl **I=N->Succs.begin(), **E=N->Succs.end(); I!=E; ++I) { + + Pass2Ty::iterator PI = Pass2.find(*I); + + if (PI != Pass2.end()) { + PI->second->addPredecessor(NewN); + continue; + } + + // Enqueue nodes to the worklist that were marked during pass 1. + + Pass1Ty::iterator pi = Pass1.find(*I); + + if (pi == Pass1.end() || pi->second == NULL) + continue; + + WL2.push_back(*I); + } + + if (N->isSink()) + NewN->markAsSink(); + } + + return G; +} diff --git a/clang/lib/Analysis/GRBlockCounter.cpp b/clang/lib/Analysis/GRBlockCounter.cpp new file mode 100644 index 00000000000..3ecc39d3224 --- /dev/null +++ b/clang/lib/Analysis/GRBlockCounter.cpp @@ -0,0 +1,54 @@ +//==- GRBlockCounter.h - ADT for counting block visits -------------*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines GRBlockCounter, an abstract data type used to count +// the number of times a given block has been visited along a path +// analyzed by GRCoreEngine. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/PathSensitive/GRBlockCounter.h" +#include "llvm/ADT/ImmutableMap.h" + +using namespace clang; + +typedef llvm::ImmutableMap<unsigned,unsigned> CountMap; + +static inline CountMap GetMap(void* D) { + return CountMap(static_cast<CountMap::TreeTy*>(D)); +} + +static inline CountMap::Factory& GetFactory(void* F) { + return *static_cast<CountMap::Factory*>(F); +} + +unsigned GRBlockCounter::getNumVisited(unsigned BlockID) const { + CountMap M = GetMap(Data); + CountMap::TreeTy* T = M.SlimFind(BlockID); + return T ? T->getValue().second : 0; +} + +GRBlockCounter::Factory::Factory(llvm::BumpPtrAllocator& Alloc) { + F = new CountMap::Factory(Alloc); +} + +GRBlockCounter::Factory::~Factory() { + delete static_cast<CountMap::Factory*>(F); +} + +GRBlockCounter +GRBlockCounter::Factory::IncrementCount(GRBlockCounter BC, unsigned BlockID) { + return GRBlockCounter(GetFactory(F).Add(GetMap(BC.Data), BlockID, + BC.getNumVisited(BlockID)+1).getRoot()); +} + +GRBlockCounter +GRBlockCounter::Factory::GetEmptyCounter() { + return GRBlockCounter(GetFactory(F).GetEmptyMap().getRoot()); +} diff --git a/clang/lib/Analysis/GRCoreEngine.cpp b/clang/lib/Analysis/GRCoreEngine.cpp new file mode 100644 index 00000000000..53831ed06d5 --- /dev/null +++ b/clang/lib/Analysis/GRCoreEngine.cpp @@ -0,0 +1,444 @@ +//==- GRCoreEngine.cpp - Path-Sensitive Dataflow Engine ----------------*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a generic engine for intraprocedural, path-sensitive, +// dataflow analysis via graph reachability engine. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/PathSensitive/GRCoreEngine.h" +#include "clang/AST/Expr.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Casting.h" +#include "llvm/ADT/DenseMap.h" +#include <vector> + +using llvm::cast; +using llvm::isa; +using namespace clang; + +namespace { + class VISIBILITY_HIDDEN DFS : public GRWorkList { + llvm::SmallVector<GRWorkListUnit,20> Stack; +public: + virtual bool hasWork() const { + return !Stack.empty(); + } + + virtual void Enqueue(const GRWorkListUnit& U) { + Stack.push_back(U); + } + + virtual GRWorkListUnit Dequeue() { + assert (!Stack.empty()); + const GRWorkListUnit& U = Stack.back(); + Stack.pop_back(); // This technically "invalidates" U, but we are fine. + return U; + } +}; +} // end anonymous namespace + +// Place the dstor for GRWorkList here because it contains virtual member +// functions, and we the code for the dstor generated in one compilation unit. +GRWorkList::~GRWorkList() {} + +GRWorkList* GRWorkList::MakeDFS() { return new DFS(); } + +/// ExecuteWorkList - Run the worklist algorithm for a maximum number of steps. +bool GRCoreEngineImpl::ExecuteWorkList(unsigned Steps) { + + if (G->num_roots() == 0) { // Initialize the analysis by constructing + // the root if none exists. + + CFGBlock* Entry = &getCFG().getEntry(); + + assert (Entry->empty() && + "Entry block must be empty."); + + assert (Entry->succ_size() == 1 && + "Entry block must have 1 successor."); + + // Get the solitary successor. + CFGBlock* Succ = *(Entry->succ_begin()); + + // Construct an edge representing the + // starting location in the function. + BlockEdge StartLoc(getCFG(), Entry, Succ); + + // Set the current block counter to being empty. + WList->setBlockCounter(BCounterFactory.GetEmptyCounter()); + + // Generate the root. + GenerateNode(StartLoc, getInitialState()); + } + + while (Steps && WList->hasWork()) { + --Steps; + const GRWorkListUnit& WU = WList->Dequeue(); + + // Set the current block counter. + WList->setBlockCounter(WU.getBlockCounter()); + + // Retrieve the node. + ExplodedNodeImpl* Node = WU.getNode(); + + // Dispatch on the location type. + switch (Node->getLocation().getKind()) { + default: + assert (isa<BlockEdge>(Node->getLocation())); + HandleBlockEdge(cast<BlockEdge>(Node->getLocation()), Node); + break; + + case ProgramPoint::BlockEntranceKind: + HandleBlockEntrance(cast<BlockEntrance>(Node->getLocation()), Node); + break; + + case ProgramPoint::BlockExitKind: + assert (false && "BlockExit location never occur in forward analysis."); + break; + + case ProgramPoint::PostStmtKind: + HandlePostStmt(cast<PostStmt>(Node->getLocation()), WU.getBlock(), + WU.getIndex(), Node); + break; + } + } + + return WList->hasWork(); +} + +void GRCoreEngineImpl::HandleBlockEdge(const BlockEdge& L, + ExplodedNodeImpl* Pred) { + + CFGBlock* Blk = L.getDst(); + + // Check if we are entering the EXIT block. + if (Blk == &getCFG().getExit()) { + + assert (getCFG().getExit().size() == 0 + && "EXIT block cannot contain Stmts."); + + // Process the final state transition. + void* State = ProcessEOP(Blk, Pred->State); + + bool IsNew; + ExplodedNodeImpl* Node = G->getNodeImpl(BlockEntrance(Blk), State, &IsNew); + Node->addPredecessor(Pred); + + // If the node was freshly created, mark it as an "End-Of-Path" node. + if (IsNew) G->addEndOfPath(Node); + + // This path is done. Don't enqueue any more nodes. + return; + } + + // FIXME: Should we allow ProcessBlockEntrance to also manipulate state? + + if (ProcessBlockEntrance(Blk, Pred->State, WList->getBlockCounter())) + GenerateNode(BlockEntrance(Blk), Pred->State, Pred); +} + +void GRCoreEngineImpl::HandleBlockEntrance(const BlockEntrance& L, + ExplodedNodeImpl* Pred) { + + // Increment the block counter. + GRBlockCounter Counter = WList->getBlockCounter(); + Counter = BCounterFactory.IncrementCount(Counter, L.getBlock()->getBlockID()); + WList->setBlockCounter(Counter); + + // Process the entrance of the block. + if (Stmt* S = L.getFirstStmt()) { + GRStmtNodeBuilderImpl Builder(L.getBlock(), 0, Pred, this); + ProcessStmt(S, Builder); + } + else + HandleBlockExit(L.getBlock(), Pred); +} + + +void GRCoreEngineImpl::HandleBlockExit(CFGBlock * B, ExplodedNodeImpl* Pred) { + + if (Stmt* Term = B->getTerminator()) { + switch (Term->getStmtClass()) { + default: + assert(false && "Analysis for this terminator not implemented."); + break; + + case Stmt::BinaryOperatorClass: // '&&' and '||' + HandleBranch(cast<BinaryOperator>(Term)->getLHS(), Term, B, Pred); + return; + + case Stmt::ConditionalOperatorClass: + HandleBranch(cast<ConditionalOperator>(Term)->getCond(), Term, B, Pred); + return; + + // FIXME: Use constant-folding in CFG construction to simplify this + // case. + + case Stmt::ChooseExprClass: + HandleBranch(cast<ChooseExpr>(Term)->getCond(), Term, B, Pred); + return; + + case Stmt::DoStmtClass: + HandleBranch(cast<DoStmt>(Term)->getCond(), Term, B, Pred); + return; + + case Stmt::ForStmtClass: + HandleBranch(cast<ForStmt>(Term)->getCond(), Term, B, Pred); + return; + + case Stmt::ContinueStmtClass: + case Stmt::BreakStmtClass: + case Stmt::GotoStmtClass: + break; + + case Stmt::IfStmtClass: + HandleBranch(cast<IfStmt>(Term)->getCond(), Term, B, Pred); + return; + + case Stmt::IndirectGotoStmtClass: { + // Only 1 successor: the indirect goto dispatch block. + assert (B->succ_size() == 1); + + GRIndirectGotoNodeBuilderImpl + builder(Pred, B, cast<IndirectGotoStmt>(Term)->getTarget(), + *(B->succ_begin()), this); + + ProcessIndirectGoto(builder); + return; + } + + case Stmt::SwitchStmtClass: { + GRSwitchNodeBuilderImpl builder(Pred, B, + cast<SwitchStmt>(Term)->getCond(), + this); + + ProcessSwitch(builder); + return; + } + + case Stmt::WhileStmtClass: + HandleBranch(cast<WhileStmt>(Term)->getCond(), Term, B, Pred); + return; + } + } + + assert (B->succ_size() == 1 && + "Blocks with no terminator should have at most 1 successor."); + + GenerateNode(BlockEdge(getCFG(),B,*(B->succ_begin())), Pred->State, Pred); +} + +void GRCoreEngineImpl::HandleBranch(Expr* Cond, Stmt* Term, CFGBlock * B, + ExplodedNodeImpl* Pred) { + assert (B->succ_size() == 2); + + GRBranchNodeBuilderImpl Builder(B, *(B->succ_begin()), *(B->succ_begin()+1), + Pred, this); + + ProcessBranch(Cond, Term, Builder); +} + +void GRCoreEngineImpl::HandlePostStmt(const PostStmt& L, CFGBlock* B, + unsigned StmtIdx, ExplodedNodeImpl* Pred) { + + assert (!B->empty()); + + if (StmtIdx == B->size()) + HandleBlockExit(B, Pred); + else { + GRStmtNodeBuilderImpl Builder(B, StmtIdx, Pred, this); + ProcessStmt((*B)[StmtIdx], Builder); + } +} + +typedef llvm::DenseMap<Stmt*,Stmt*> ParentMapTy; +/// PopulateParentMap - Recurse the AST starting at 'Parent' and add the +/// mappings between child and parent to ParentMap. +static void PopulateParentMap(Stmt* Parent, ParentMapTy& M) { + for (Stmt::child_iterator I=Parent->child_begin(), + E=Parent->child_end(); I!=E; ++I) { + + assert (M.find(*I) == M.end()); + M[*I] = Parent; + PopulateParentMap(*I, M); + } +} + +/// GenerateNode - Utility method to generate nodes, hook up successors, +/// and add nodes to the worklist. +void GRCoreEngineImpl::GenerateNode(const ProgramPoint& Loc, void* State, + ExplodedNodeImpl* Pred) { + + bool IsNew; + ExplodedNodeImpl* Node = G->getNodeImpl(Loc, State, &IsNew); + + if (Pred) + Node->addPredecessor(Pred); // Link 'Node' with its predecessor. + else { + assert (IsNew); + G->addRoot(Node); // 'Node' has no predecessor. Make it a root. + } + + // Only add 'Node' to the worklist if it was freshly generated. + if (IsNew) WList->Enqueue(Node); +} + +GRStmtNodeBuilderImpl::GRStmtNodeBuilderImpl(CFGBlock* b, unsigned idx, + ExplodedNodeImpl* N, GRCoreEngineImpl* e) + : Eng(*e), B(*b), Idx(idx), Pred(N), LastNode(N), Populated(false) { + Deferred.insert(N); +} + +GRStmtNodeBuilderImpl::~GRStmtNodeBuilderImpl() { + for (DeferredTy::iterator I=Deferred.begin(), E=Deferred.end(); I!=E; ++I) + if (!(*I)->isSink()) + GenerateAutoTransition(*I); +} + +void GRStmtNodeBuilderImpl::GenerateAutoTransition(ExplodedNodeImpl* N) { + assert (!N->isSink()); + + PostStmt Loc(getStmt()); + + if (Loc == N->getLocation()) { + // Note: 'N' should be a fresh node because otherwise it shouldn't be + // a member of Deferred. + Eng.WList->Enqueue(N, B, Idx+1); + return; + } + + bool IsNew; + ExplodedNodeImpl* Succ = Eng.G->getNodeImpl(Loc, N->State, &IsNew); + Succ->addPredecessor(N); + + if (IsNew) + Eng.WList->Enqueue(Succ, B, Idx+1); +} + +ExplodedNodeImpl* GRStmtNodeBuilderImpl::generateNodeImpl(Stmt* S, void* State, + ExplodedNodeImpl* Pred) { + + bool IsNew; + ExplodedNodeImpl* N = Eng.G->getNodeImpl(PostStmt(S), State, &IsNew); + N->addPredecessor(Pred); + Deferred.erase(Pred); + + HasGeneratedNode = true; + + if (IsNew) { + Deferred.insert(N); + LastNode = N; + return N; + } + + LastNode = NULL; + return NULL; +} + +ExplodedNodeImpl* GRBranchNodeBuilderImpl::generateNodeImpl(void* State, + bool branch) { + bool IsNew; + + ExplodedNodeImpl* Succ = + Eng.G->getNodeImpl(BlockEdge(Eng.getCFG(), Src, branch ? DstT : DstF), + State, &IsNew); + + Succ->addPredecessor(Pred); + + if (branch) GeneratedTrue = true; + else GeneratedFalse = true; + + if (IsNew) { + Deferred.push_back(Succ); + return Succ; + } + + return NULL; +} + +GRBranchNodeBuilderImpl::~GRBranchNodeBuilderImpl() { + if (!GeneratedTrue) generateNodeImpl(Pred->State, true); + if (!GeneratedFalse) generateNodeImpl(Pred->State, false); + + for (DeferredTy::iterator I=Deferred.begin(), E=Deferred.end(); I!=E; ++I) + if (!(*I)->isSink()) Eng.WList->Enqueue(*I); +} + + +ExplodedNodeImpl* +GRIndirectGotoNodeBuilderImpl::generateNodeImpl(const Iterator& I, + void* St, + bool isSink) { + bool IsNew; + + ExplodedNodeImpl* Succ = + Eng.G->getNodeImpl(BlockEdge(Eng.getCFG(), Src, I.getBlock(), true), + St, &IsNew); + + Succ->addPredecessor(Pred); + + if (IsNew) { + + if (isSink) + Succ->markAsSink(); + else + Eng.WList->Enqueue(Succ); + + return Succ; + } + + return NULL; +} + + +ExplodedNodeImpl* +GRSwitchNodeBuilderImpl::generateCaseStmtNodeImpl(const Iterator& I, void* St) { + + bool IsNew; + + ExplodedNodeImpl* Succ = Eng.G->getNodeImpl(BlockEdge(Eng.getCFG(), Src, + I.getBlock()), + St, &IsNew); + Succ->addPredecessor(Pred); + + if (IsNew) { + Eng.WList->Enqueue(Succ); + return Succ; + } + + return NULL; +} + + +ExplodedNodeImpl* +GRSwitchNodeBuilderImpl::generateDefaultCaseNodeImpl(void* St, bool isSink) { + + // Get the block for the default case. + assert (Src->succ_rbegin() != Src->succ_rend()); + CFGBlock* DefaultBlock = *Src->succ_rbegin(); + + bool IsNew; + + ExplodedNodeImpl* Succ = Eng.G->getNodeImpl(BlockEdge(Eng.getCFG(), Src, + DefaultBlock), + St, &IsNew); + Succ->addPredecessor(Pred); + + if (IsNew) { + if (isSink) + Succ->markAsSink(); + else + Eng.WList->Enqueue(Succ); + + return Succ; + } + + return NULL; +} diff --git a/clang/lib/Analysis/GRExprEngine.cpp b/clang/lib/Analysis/GRExprEngine.cpp new file mode 100644 index 00000000000..f1108df4051 --- /dev/null +++ b/clang/lib/Analysis/GRExprEngine.cpp @@ -0,0 +1,1941 @@ +//=-- GRExprEngine.cpp - Path-Sensitive Expression-Level Dataflow ---*- C++ -*-= +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a meta-engine for path-sensitive dataflow analysis that +// is built on GREngine, but provides the boilerplate to execute transfer +// functions and build the ExplodedGraph at the expression level. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/PathSensitive/GRExprEngine.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/Support/Streams.h" + +#ifndef NDEBUG +#include "llvm/Support/GraphWriter.h" +#include <sstream> +#endif + +// SaveAndRestore - A utility class that uses RIIA to save and restore +// the value of a variable. +template<typename T> +struct VISIBILITY_HIDDEN SaveAndRestore { + SaveAndRestore(T& x) : X(x), old_value(x) {} + ~SaveAndRestore() { X = old_value; } + T get() { return old_value; } + + T& X; + T old_value; +}; + +using namespace clang; +using llvm::dyn_cast; +using llvm::cast; +using llvm::APSInt; + + +ValueState* GRExprEngine::getInitialState() { + + // The LiveVariables information already has a compilation of all VarDecls + // used in the function. Iterate through this set, and "symbolicate" + // any VarDecl whose value originally comes from outside the function. + + typedef LiveVariables::AnalysisDataTy LVDataTy; + LVDataTy& D = Liveness.getAnalysisData(); + + ValueState StateImpl = *StateMgr.getInitialState(); + + for (LVDataTy::decl_iterator I=D.begin_decl(), E=D.end_decl(); I != E; ++I) { + + VarDecl* VD = cast<VarDecl>(const_cast<ScopedDecl*>(I->first)); + + if (VD->hasGlobalStorage() || isa<ParmVarDecl>(VD)) { + RVal X = RVal::GetSymbolValue(SymMgr, VD); + StateMgr.BindVar(StateImpl, VD, X); + } + } + + return StateMgr.getPersistentState(StateImpl); +} + +ValueState* GRExprEngine::SetRVal(ValueState* St, Expr* Ex, RVal V) { + + bool isBlkExpr = false; + + if (Ex == CurrentStmt) { + isBlkExpr = getCFG().isBlkExpr(Ex); + + if (!isBlkExpr) + return St; + } + + return StateMgr.SetRVal(St, Ex, V, isBlkExpr, false); +} + +ValueState* GRExprEngine::MarkBranch(ValueState* St, Stmt* Terminator, + bool branchTaken) { + + switch (Terminator->getStmtClass()) { + default: + return St; + + case Stmt::BinaryOperatorClass: { // '&&' and '||' + + BinaryOperator* B = cast<BinaryOperator>(Terminator); + BinaryOperator::Opcode Op = B->getOpcode(); + + assert (Op == BinaryOperator::LAnd || Op == BinaryOperator::LOr); + + // For &&, if we take the true branch, then the value of the whole + // expression is that of the RHS expression. + // + // For ||, if we take the false branch, then the value of the whole + // expression is that of the RHS expression. + + Expr* Ex = (Op == BinaryOperator::LAnd && branchTaken) || + (Op == BinaryOperator::LOr && !branchTaken) + ? B->getRHS() : B->getLHS(); + + return SetBlkExprRVal(St, B, UndefinedVal(Ex)); + } + + case Stmt::ConditionalOperatorClass: { // ?: + + ConditionalOperator* C = cast<ConditionalOperator>(Terminator); + + // For ?, if branchTaken == true then the value is either the LHS or + // the condition itself. (GNU extension). + + Expr* Ex; + + if (branchTaken) + Ex = C->getLHS() ? C->getLHS() : C->getCond(); + else + Ex = C->getRHS(); + + return SetBlkExprRVal(St, C, UndefinedVal(Ex)); + } + + case Stmt::ChooseExprClass: { // ?: + + ChooseExpr* C = cast<ChooseExpr>(Terminator); + + Expr* Ex = branchTaken ? C->getLHS() : C->getRHS(); + return SetBlkExprRVal(St, C, UndefinedVal(Ex)); + } + } +} + +bool GRExprEngine::ProcessBlockEntrance(CFGBlock* B, ValueState*, + GRBlockCounter BC) { + + return BC.getNumVisited(B->getBlockID()) < 3; +} + +void GRExprEngine::ProcessBranch(Expr* Condition, Stmt* Term, + BranchNodeBuilder& builder) { + + // Remove old bindings for subexpressions. + ValueState* PrevState = StateMgr.RemoveSubExprBindings(builder.getState()); + + // Check for NULL conditions; e.g. "for(;;)" + if (!Condition) { + builder.markInfeasible(false); + return; + } + + RVal V = GetRVal(PrevState, Condition); + + switch (V.getBaseKind()) { + default: + break; + + case RVal::UnknownKind: + builder.generateNode(MarkBranch(PrevState, Term, true), true); + builder.generateNode(MarkBranch(PrevState, Term, false), false); + return; + + case RVal::UndefinedKind: { + NodeTy* N = builder.generateNode(PrevState, true); + + if (N) { + N->markAsSink(); + UndefBranches.insert(N); + } + + builder.markInfeasible(false); + return; + } + } + + + // Process the true branch. + + bool isFeasible = false; + ValueState* St = Assume(PrevState, V, true, isFeasible); + + if (isFeasible) + builder.generateNode(MarkBranch(St, Term, true), true); + else + builder.markInfeasible(true); + + // Process the false branch. + + isFeasible = false; + St = Assume(PrevState, V, false, isFeasible); + + if (isFeasible) + builder.generateNode(MarkBranch(St, Term, false), false); + else + builder.markInfeasible(false); +} + +/// ProcessIndirectGoto - Called by GRCoreEngine. Used to generate successor +/// nodes by processing the 'effects' of a computed goto jump. +void GRExprEngine::ProcessIndirectGoto(IndirectGotoNodeBuilder& builder) { + + ValueState* St = builder.getState(); + RVal V = GetRVal(St, builder.getTarget()); + + // Three possibilities: + // + // (1) We know the computed label. + // (2) The label is NULL (or some other constant), or Undefined. + // (3) We have no clue about the label. Dispatch to all targets. + // + + typedef IndirectGotoNodeBuilder::iterator iterator; + + if (isa<lval::GotoLabel>(V)) { + LabelStmt* L = cast<lval::GotoLabel>(V).getLabel(); + + for (iterator I=builder.begin(), E=builder.end(); I != E; ++I) { + if (I.getLabel() == L) { + builder.generateNode(I, St); + return; + } + } + + assert (false && "No block with label."); + return; + } + + if (isa<lval::ConcreteInt>(V) || isa<UndefinedVal>(V)) { + // Dispatch to the first target and mark it as a sink. + NodeTy* N = builder.generateNode(builder.begin(), St, true); + UndefBranches.insert(N); + return; + } + + // This is really a catch-all. We don't support symbolics yet. + + assert (V.isUnknown()); + + for (iterator I=builder.begin(), E=builder.end(); I != E; ++I) + builder.generateNode(I, St); +} + +/// ProcessSwitch - Called by GRCoreEngine. Used to generate successor +/// nodes by processing the 'effects' of a switch statement. +void GRExprEngine::ProcessSwitch(SwitchNodeBuilder& builder) { + + typedef SwitchNodeBuilder::iterator iterator; + + ValueState* St = builder.getState(); + Expr* CondE = builder.getCondition(); + RVal CondV = GetRVal(St, CondE); + + if (CondV.isUndef()) { + NodeTy* N = builder.generateDefaultCaseNode(St, true); + UndefBranches.insert(N); + return; + } + + ValueState* DefaultSt = St; + + // While most of this can be assumed (such as the signedness), having it + // just computed makes sure everything makes the same assumptions end-to-end. + + unsigned bits = getContext().getTypeSize(CondE->getType()); + + APSInt V1(bits, false); + APSInt V2 = V1; + + for (iterator I = builder.begin(), EI = builder.end(); I != EI; ++I) { + + CaseStmt* Case = cast<CaseStmt>(I.getCase()); + + // Evaluate the case. + if (!Case->getLHS()->isIntegerConstantExpr(V1, getContext(), 0, true)) { + assert (false && "Case condition must evaluate to an integer constant."); + return; + } + + // Get the RHS of the case, if it exists. + + if (Expr* E = Case->getRHS()) { + if (!E->isIntegerConstantExpr(V2, getContext(), 0, true)) { + assert (false && + "Case condition (RHS) must evaluate to an integer constant."); + return ; + } + + assert (V1 <= V2); + } + else V2 = V1; + + // FIXME: Eventually we should replace the logic below with a range + // comparison, rather than concretize the values within the range. + // This should be easy once we have "ranges" for NonLVals. + + do { + nonlval::ConcreteInt CaseVal(BasicVals.getValue(V1)); + + RVal Res = EvalBinOp(BinaryOperator::EQ, CondV, CaseVal); + + // Now "assume" that the case matches. + + bool isFeasible = false; + ValueState* StNew = Assume(St, Res, true, isFeasible); + + if (isFeasible) { + builder.generateCaseStmtNode(I, StNew); + + // If CondV evaluates to a constant, then we know that this + // is the *only* case that we can take, so stop evaluating the + // others. + if (isa<nonlval::ConcreteInt>(CondV)) + return; + } + + // Now "assume" that the case doesn't match. Add this state + // to the default state (if it is feasible). + + isFeasible = false; + StNew = Assume(DefaultSt, Res, false, isFeasible); + + if (isFeasible) + DefaultSt = StNew; + + // Concretize the next value in the range. + ++V1; + + } while (V1 < V2); + } + + // If we reach here, than we know that the default branch is + // possible. + builder.generateDefaultCaseNode(DefaultSt); +} + + +void GRExprEngine::VisitLogicalExpr(BinaryOperator* B, NodeTy* Pred, + NodeSet& Dst) { + + assert (B->getOpcode() == BinaryOperator::LAnd || + B->getOpcode() == BinaryOperator::LOr); + + assert (B == CurrentStmt && getCFG().isBlkExpr(B)); + + ValueState* St = GetState(Pred); + RVal X = GetBlkExprRVal(St, B); + + assert (X.isUndef()); + + Expr* Ex = (Expr*) cast<UndefinedVal>(X).getData(); + + assert (Ex); + + if (Ex == B->getRHS()) { + + X = GetBlkExprRVal(St, Ex); + + // Handle undefined values. + + if (X.isUndef()) { + Nodify(Dst, B, Pred, SetBlkExprRVal(St, B, X)); + return; + } + + // We took the RHS. Because the value of the '&&' or '||' expression must + // evaluate to 0 or 1, we must assume the value of the RHS evaluates to 0 + // or 1. Alternatively, we could take a lazy approach, and calculate this + // value later when necessary. We don't have the machinery in place for + // this right now, and since most logical expressions are used for branches, + // the payoff is not likely to be large. Instead, we do eager evaluation. + + bool isFeasible = false; + ValueState* NewState = Assume(St, X, true, isFeasible); + + if (isFeasible) + Nodify(Dst, B, Pred, SetBlkExprRVal(NewState, B, MakeConstantVal(1U, B))); + + isFeasible = false; + NewState = Assume(St, X, false, isFeasible); + + if (isFeasible) + Nodify(Dst, B, Pred, SetBlkExprRVal(NewState, B, MakeConstantVal(0U, B))); + } + else { + // We took the LHS expression. Depending on whether we are '&&' or + // '||' we know what the value of the expression is via properties of + // the short-circuiting. + + X = MakeConstantVal( B->getOpcode() == BinaryOperator::LAnd ? 0U : 1U, B); + Nodify(Dst, B, Pred, SetBlkExprRVal(St, B, X)); + } +} + + +void GRExprEngine::ProcessStmt(Stmt* S, StmtNodeBuilder& builder) { + + Builder = &builder; + StmtEntryNode = builder.getLastNode(); + CurrentStmt = S; + NodeSet Dst; + + // Create the cleaned state. + + CleanedState = StateMgr.RemoveDeadBindings(StmtEntryNode->getState(), + CurrentStmt, Liveness); + + Builder->SetCleanedState(CleanedState); + + // Visit the statement. + + Visit(S, StmtEntryNode, Dst); + + // If no nodes were generated, generate a new node that has all the + // dead mappings removed. + + if (Dst.size() == 1 && *Dst.begin() == StmtEntryNode) + builder.generateNode(S, GetState(StmtEntryNode), StmtEntryNode); + + // NULL out these variables to cleanup. + + CurrentStmt = NULL; + StmtEntryNode = NULL; + Builder = NULL; + CleanedState = NULL; +} + +void GRExprEngine::VisitDeclRefExpr(DeclRefExpr* D, NodeTy* Pred, NodeSet& Dst){ + + if (D != CurrentStmt) { + Dst.Add(Pred); // No-op. Simply propagate the current state unchanged. + return; + } + + // If we are here, we are loading the value of the decl and binding + // it to the block-level expression. + + ValueState* St = GetState(Pred); + RVal X = RVal::MakeVal(BasicVals, D); + RVal Y = isa<lval::DeclVal>(X) ? GetRVal(St, cast<lval::DeclVal>(X)) : X; + Nodify(Dst, D, Pred, SetBlkExprRVal(St, D, Y)); +} + +void GRExprEngine::VisitCall(CallExpr* CE, NodeTy* Pred, + CallExpr::arg_iterator AI, + CallExpr::arg_iterator AE, + NodeSet& Dst) { + + // Process the arguments. + + if (AI != AE) { + + NodeSet DstTmp; + Visit(*AI, Pred, DstTmp); + ++AI; + + for (NodeSet::iterator DI=DstTmp.begin(), DE=DstTmp.end(); DI != DE; ++DI) + VisitCall(CE, *DI, AI, AE, Dst); + + return; + } + + // If we reach here we have processed all of the arguments. Evaluate + // the callee expression. + + NodeSet DstTmp; + Expr* Callee = CE->getCallee()->IgnoreParenCasts(); + + VisitLVal(Callee, Pred, DstTmp); + + if (DstTmp.empty()) + DstTmp.Add(Pred); + + // Finally, evaluate the function call. + for (NodeSet::iterator DI = DstTmp.begin(), DE = DstTmp.end(); DI!=DE; ++DI) { + + ValueState* St = GetState(*DI); + RVal L = GetLVal(St, Callee); + + // FIXME: Add support for symbolic function calls (calls involving + // function pointer values that are symbolic). + + // Check for undefined control-flow or calls to NULL. + + if (L.isUndef() || isa<lval::ConcreteInt>(L)) { + NodeTy* N = Builder->generateNode(CE, St, *DI); + + if (N) { + N->markAsSink(); + BadCalls.insert(N); + } + + continue; + } + + // Check for the "noreturn" attribute. + + SaveAndRestore<bool> OldSink(Builder->BuildSinks); + + if (isa<lval::FuncVal>(L)) { + + FunctionDecl* FD = cast<lval::FuncVal>(L).getDecl(); + + if (FD->getAttr<NoReturnAttr>()) + Builder->BuildSinks = true; + else { + // HACK: Some functions are not marked noreturn, and don't return. + // Here are a few hardwired ones. If this takes too long, we can + // potentially cache these results. + const char* s = FD->getIdentifier()->getName(); + unsigned n = strlen(s); + + switch (n) { + default: + break; + + case 4: + if (!memcmp(s, "exit", 4)) Builder->BuildSinks = true; + break; + + case 5: + if (!memcmp(s, "panic", 5)) Builder->BuildSinks = true; + break; + } + } + } + + // Evaluate the call. + + + bool invalidateArgs = false; + + if (L.isUnknown()) { + // Check for an "unknown" callee. + invalidateArgs = true; + } + else if (isa<lval::FuncVal>(L)) { + + IdentifierInfo* Info = cast<lval::FuncVal>(L).getDecl()->getIdentifier(); + + if (unsigned id = Info->getBuiltinID()) { + switch (id) { + case Builtin::BI__builtin_expect: { + // For __builtin_expect, just return the value of the subexpression. + assert (CE->arg_begin() != CE->arg_end()); + RVal X = GetRVal(St, *(CE->arg_begin())); + Nodify(Dst, CE, *DI, SetRVal(St, CE, X)); + continue; + } + + default: + invalidateArgs = true; + break; + } + } + } + + if (invalidateArgs) { + // Invalidate all arguments passed in by reference (LVals). + for (CallExpr::arg_iterator I = CE->arg_begin(), E = CE->arg_end(); + I != E; ++I) { + RVal V = GetRVal(St, *I); + + if (isa<LVal>(V)) + St = SetRVal(St, cast<LVal>(V), UnknownVal()); + } + + Nodify(Dst, CE, *DI, St); + } + else { + + // Check any arguments passed-by-value against being undefined. + + bool badArg = false; + + for (CallExpr::arg_iterator I = CE->arg_begin(), E = CE->arg_end(); + I != E; ++I) { + + if (GetRVal(GetState(*DI), *I).isUndef()) { + NodeTy* N = Builder->generateNode(CE, GetState(*DI), *DI); + + if (N) { + N->markAsSink(); + UndefArgs[N] = *I; + } + + badArg = true; + break; + } + } + + if (badArg) + continue; + + // Dispatch to the plug-in transfer function. + + unsigned size = Dst.size(); + + EvalCall(Dst, CE, cast<LVal>(L), *DI); + + if (!Builder->BuildSinks && Dst.size() == size) + Nodify(Dst, CE, *DI, St); + } + } +} + +void GRExprEngine::VisitCast(Expr* CastE, Expr* Ex, NodeTy* Pred, NodeSet& Dst){ + + NodeSet S1; + QualType T = CastE->getType(); + + if (T->isReferenceType()) + VisitLVal(Ex, Pred, S1); + else + Visit(Ex, Pred, S1); + + // Check for redundant casts or casting to "void" + if (T->isVoidType() || + Ex->getType() == T || + (T->isPointerType() && Ex->getType()->isFunctionType())) { + + for (NodeSet::iterator I1 = S1.begin(), E1 = S1.end(); I1 != E1; ++I1) + Dst.Add(*I1); + + return; + } + + for (NodeSet::iterator I1 = S1.begin(), E1 = S1.end(); I1 != E1; ++I1) { + NodeTy* N = *I1; + ValueState* St = GetState(N); + + RVal V = T->isReferenceType() ? GetLVal(St, Ex) : GetRVal(St, Ex); + + Nodify(Dst, CastE, N, SetRVal(St, CastE, EvalCast(V, CastE->getType()))); + } +} + +void GRExprEngine::VisitDeclStmt(DeclStmt* DS, GRExprEngine::NodeTy* Pred, + GRExprEngine::NodeSet& Dst) { + + ValueState* St = GetState(Pred); + + for (const ScopedDecl* D = DS->getDecl(); D; D = D->getNextDeclarator()) + if (const VarDecl* VD = dyn_cast<VarDecl>(D)) { + + // FIXME: Add support for local arrays. + if (VD->getType()->isArrayType()) + continue; + + const Expr* Ex = VD->getInit(); + + if (!VD->hasGlobalStorage() || VD->getStorageClass() == VarDecl::Static) { + + // In this context, Static => Local variable. + + assert (!VD->getStorageClass() == VarDecl::Static || + !isa<FileVarDecl>(VD)); + + // If there is no initializer, set the value of the + // variable to "Undefined". + // + // FIXME: static variables may have an initializer, but the second + // time a function is called those values may not be current. + + QualType T = VD->getType(); + + if ( VD->getStorageClass() == VarDecl::Static) { + + // C99: 6.7.8 Initialization + // If an object that has static storage duration is not initialized + // explicitly, then: + // —if it has pointer type, it is initialized to a null pointer; + // —if it has arithmetic type, it is initialized to (positive or + // unsigned) zero; + + // FIXME: Handle structs. Now we treat their values as unknown. + + if (T->isPointerType()) { + + St = SetRVal(St, lval::DeclVal(VD), + lval::ConcreteInt(BasicVals.getValue(0, T))); + } + else if (T->isIntegerType()) { + + St = SetRVal(St, lval::DeclVal(VD), + nonlval::ConcreteInt(BasicVals.getValue(0, T))); + } + + + } + else { + + // FIXME: Handle structs. Now we treat them as unknown. What + // we need to do is treat their members as unknown. + + if (T->isPointerType() || T->isIntegerType()) + St = SetRVal(St, lval::DeclVal(VD), + Ex ? GetRVal(St, Ex) : UndefinedVal()); + } + } + } + + Nodify(Dst, DS, Pred, St); +} + + +void GRExprEngine::VisitGuardedExpr(Expr* Ex, Expr* L, Expr* R, + NodeTy* Pred, NodeSet& Dst) { + + assert (Ex == CurrentStmt && getCFG().isBlkExpr(Ex)); + + ValueState* St = GetState(Pred); + RVal X = GetBlkExprRVal(St, Ex); + + assert (X.isUndef()); + + Expr* SE = (Expr*) cast<UndefinedVal>(X).getData(); + + assert (SE); + + X = GetBlkExprRVal(St, SE); + + // Make sure that we invalidate the previous binding. + Nodify(Dst, Ex, Pred, StateMgr.SetRVal(St, Ex, X, true, true)); +} + +/// VisitSizeOfAlignOfTypeExpr - Transfer function for sizeof(type). +void GRExprEngine::VisitSizeOfAlignOfTypeExpr(SizeOfAlignOfTypeExpr* Ex, + NodeTy* Pred, + NodeSet& Dst) { + + QualType T = Ex->getArgumentType(); + uint64_t amt; + + if (Ex->isSizeOf()) { + + // FIXME: Add support for VLAs. + if (!T.getTypePtr()->isConstantSizeType()) + return; + + amt = 1; // Handle sizeof(void) + + if (T != getContext().VoidTy) + amt = getContext().getTypeSize(T) / 8; + + } + else // Get alignment of the type. + amt = getContext().getTypeAlign(T) / 8; + + Nodify(Dst, Ex, Pred, + SetRVal(GetState(Pred), Ex, + NonLVal::MakeVal(BasicVals, amt, Ex->getType()))); +} + +void GRExprEngine::VisitDeref(UnaryOperator* U, NodeTy* Pred, + NodeSet& Dst, bool GetLVal) { + + Expr* Ex = U->getSubExpr()->IgnoreParens(); + + NodeSet DstTmp; + + if (isa<DeclRefExpr>(Ex)) + DstTmp.Add(Pred); + else + Visit(Ex, Pred, DstTmp); + + for (NodeSet::iterator I = DstTmp.begin(), DE = DstTmp.end(); I != DE; ++I) { + + NodeTy* N = *I; + ValueState* St = GetState(N); + + // FIXME: Bifurcate when dereferencing a symbolic with no constraints? + + RVal V = GetRVal(St, Ex); + + // Check for dereferences of undefined values. + + if (V.isUndef()) { + + NodeTy* Succ = Builder->generateNode(U, St, N); + + if (Succ) { + Succ->markAsSink(); + UndefDeref.insert(Succ); + } + + continue; + } + + // Check for dereferences of unknown values. Treat as No-Ops. + + if (V.isUnknown()) { + Dst.Add(N); + continue; + } + + // After a dereference, one of two possible situations arise: + // (1) A crash, because the pointer was NULL. + // (2) The pointer is not NULL, and the dereference works. + // + // We add these assumptions. + + LVal LV = cast<LVal>(V); + bool isFeasibleNotNull; + + // "Assume" that the pointer is Not-NULL. + + ValueState* StNotNull = Assume(St, LV, true, isFeasibleNotNull); + + if (isFeasibleNotNull) { + + if (GetLVal) Nodify(Dst, U, N, SetRVal(StNotNull, U, LV)); + else { + + // FIXME: Currently symbolic analysis "generates" new symbols + // for the contents of values. We need a better approach. + + Nodify(Dst, U, N, SetRVal(StNotNull, U, + GetRVal(StNotNull, LV, U->getType()))); + } + } + + bool isFeasibleNull; + + // Now "assume" that the pointer is NULL. + + ValueState* StNull = Assume(St, LV, false, isFeasibleNull); + + if (isFeasibleNull) { + + // We don't use "Nodify" here because the node will be a sink + // and we have no intention of processing it later. + + NodeTy* NullNode = Builder->generateNode(U, StNull, N); + + if (NullNode) { + + NullNode->markAsSink(); + + if (isFeasibleNotNull) ImplicitNullDeref.insert(NullNode); + else ExplicitNullDeref.insert(NullNode); + } + } + } +} + +void GRExprEngine::VisitUnaryOperator(UnaryOperator* U, NodeTy* Pred, + NodeSet& Dst) { + + NodeSet S1; + + assert (U->getOpcode() != UnaryOperator::Deref); + assert (U->getOpcode() != UnaryOperator::SizeOf); + assert (U->getOpcode() != UnaryOperator::AlignOf); + + bool use_GetLVal = false; + + switch (U->getOpcode()) { + case UnaryOperator::PostInc: + case UnaryOperator::PostDec: + case UnaryOperator::PreInc: + case UnaryOperator::PreDec: + case UnaryOperator::AddrOf: + // Evalue subexpression as an LVal. + use_GetLVal = true; + VisitLVal(U->getSubExpr(), Pred, S1); + break; + + default: + Visit(U->getSubExpr(), Pred, S1); + break; + } + + for (NodeSet::iterator I1 = S1.begin(), E1 = S1.end(); I1 != E1; ++I1) { + + NodeTy* N1 = *I1; + ValueState* St = GetState(N1); + + RVal SubV = use_GetLVal ? GetLVal(St, U->getSubExpr()) : + GetRVal(St, U->getSubExpr()); + + if (SubV.isUnknown()) { + Dst.Add(N1); + continue; + } + + if (SubV.isUndef()) { + Nodify(Dst, U, N1, SetRVal(St, U, SubV)); + continue; + } + + if (U->isIncrementDecrementOp()) { + + // Handle ++ and -- (both pre- and post-increment). + + LVal SubLV = cast<LVal>(SubV); + RVal V = GetRVal(St, SubLV, U->getType()); + + if (V.isUnknown()) { + Dst.Add(N1); + continue; + } + + // Propagate undefined values. + if (V.isUndef()) { + Nodify(Dst, U, N1, SetRVal(St, U, V)); + continue; + } + + // Handle all other values. + + BinaryOperator::Opcode Op = U->isIncrementOp() ? BinaryOperator::Add + : BinaryOperator::Sub; + + RVal Result = EvalBinOp(Op, V, MakeConstantVal(1U, U)); + + if (U->isPostfix()) + St = SetRVal(SetRVal(St, U, V), SubLV, Result); + else + St = SetRVal(SetRVal(St, U, Result), SubLV, Result); + + Nodify(Dst, U, N1, St); + continue; + } + + // Handle all other unary operators. + + switch (U->getOpcode()) { + + case UnaryOperator::Extension: + St = SetRVal(St, U, SubV); + break; + + case UnaryOperator::Minus: + St = SetRVal(St, U, EvalMinus(U, cast<NonLVal>(SubV))); + break; + + case UnaryOperator::Not: + St = SetRVal(St, U, EvalComplement(cast<NonLVal>(SubV))); + break; + + case UnaryOperator::LNot: + + // C99 6.5.3.3: "The expression !E is equivalent to (0==E)." + // + // Note: technically we do "E == 0", but this is the same in the + // transfer functions as "0 == E". + + if (isa<LVal>(SubV)) { + lval::ConcreteInt V(BasicVals.getZeroWithPtrWidth()); + RVal Result = EvalBinOp(BinaryOperator::EQ, cast<LVal>(SubV), V); + St = SetRVal(St, U, Result); + } + else { + Expr* Ex = U->getSubExpr(); + nonlval::ConcreteInt V(BasicVals.getValue(0, Ex->getType())); + RVal Result = EvalBinOp(BinaryOperator::EQ, cast<NonLVal>(SubV), V); + St = SetRVal(St, U, Result); + } + + break; + + case UnaryOperator::AddrOf: { + assert (isa<LVal>(SubV)); + St = SetRVal(St, U, SubV); + break; + } + + default: ; + assert (false && "Not implemented."); + } + + Nodify(Dst, U, N1, St); + } +} + +void GRExprEngine::VisitSizeOfExpr(UnaryOperator* U, NodeTy* Pred, + NodeSet& Dst) { + + QualType T = U->getSubExpr()->getType(); + + // FIXME: Add support for VLAs. + if (!T.getTypePtr()->isConstantSizeType()) + return; + + uint64_t size = getContext().getTypeSize(T) / 8; + ValueState* St = GetState(Pred); + St = SetRVal(St, U, NonLVal::MakeVal(BasicVals, size, U->getType())); + + Nodify(Dst, U, Pred, St); +} + +void GRExprEngine::VisitLVal(Expr* Ex, NodeTy* Pred, NodeSet& Dst) { + + if (Ex != CurrentStmt && getCFG().isBlkExpr(Ex)) { + Dst.Add(Pred); + return; + } + + Ex = Ex->IgnoreParens(); + + if (isa<DeclRefExpr>(Ex)) { + Dst.Add(Pred); + return; + } + + if (UnaryOperator* U = dyn_cast<UnaryOperator>(Ex)) + if (U->getOpcode() == UnaryOperator::Deref) { + VisitDeref(U, Pred, Dst, true); + return; + } + + Visit(Ex, Pred, Dst); +} + +void GRExprEngine::VisitBinaryOperator(BinaryOperator* B, + GRExprEngine::NodeTy* Pred, + GRExprEngine::NodeSet& Dst) { + NodeSet S1; + + if (B->isAssignmentOp()) + VisitLVal(B->getLHS(), Pred, S1); + else + Visit(B->getLHS(), Pred, S1); + + for (NodeSet::iterator I1=S1.begin(), E1=S1.end(); I1 != E1; ++I1) { + + NodeTy* N1 = *I1; + + // When getting the value for the LHS, check if we are in an assignment. + // In such cases, we want to (initially) treat the LHS as an LVal, + // so we use GetLVal instead of GetRVal so that DeclRefExpr's are + // evaluated to LValDecl's instead of to an NonLVal. + + RVal LeftV = B->isAssignmentOp() ? GetLVal(GetState(N1), B->getLHS()) + : GetRVal(GetState(N1), B->getLHS()); + + // Visit the RHS... + + NodeSet S2; + Visit(B->getRHS(), N1, S2); + + // Process the binary operator. + + for (NodeSet::iterator I2 = S2.begin(), E2 = S2.end(); I2 != E2; ++I2) { + + NodeTy* N2 = *I2; + ValueState* St = GetState(N2); + Expr* RHS = B->getRHS(); + RVal RightV = GetRVal(St, RHS); + + BinaryOperator::Opcode Op = B->getOpcode(); + + if ((Op == BinaryOperator::Div || Op == BinaryOperator::Rem) + && RHS->getType()->isIntegerType()) { + + // Check if the denominator is undefined. + + if (!RightV.isUnknown()) { + + if (RightV.isUndef()) { + NodeTy* DivUndef = Builder->generateNode(B, St, N2); + + if (DivUndef) { + DivUndef->markAsSink(); + ExplicitBadDivides.insert(DivUndef); + } + + continue; + } + + // Check for divide/remainder-by-zero. + // + // First, "assume" that the denominator is 0 or undefined. + + bool isFeasibleZero = false; + ValueState* ZeroSt = Assume(St, RightV, false, isFeasibleZero); + + // Second, "assume" that the denominator cannot be 0. + + bool isFeasibleNotZero = false; + St = Assume(St, RightV, true, isFeasibleNotZero); + + // Create the node for the divide-by-zero (if it occurred). + + if (isFeasibleZero) + if (NodeTy* DivZeroNode = Builder->generateNode(B, ZeroSt, N2)) { + DivZeroNode->markAsSink(); + + if (isFeasibleNotZero) + ImplicitBadDivides.insert(DivZeroNode); + else + ExplicitBadDivides.insert(DivZeroNode); + + } + + if (!isFeasibleNotZero) + continue; + } + + // Fall-through. The logic below processes the divide. + } + + + if (Op <= BinaryOperator::Or) { + + // Process non-assignements except commas or short-circuited + // logical expressions (LAnd and LOr). + + RVal Result = EvalBinOp(Op, LeftV, RightV); + + if (Result.isUnknown()) { + Dst.Add(N2); + continue; + } + + if (Result.isUndef() && !LeftV.isUndef() && !RightV.isUndef()) { + + // The operands were not undefined, but the result is undefined. + + if (NodeTy* UndefNode = Builder->generateNode(B, St, N2)) { + UndefNode->markAsSink(); + UndefResults.insert(UndefNode); + } + + continue; + } + + Nodify(Dst, B, N2, SetRVal(St, B, Result)); + continue; + } + + // Process assignments. + + switch (Op) { + + case BinaryOperator::Assign: { + + // Simple assignments. + + if (LeftV.isUndef()) { + HandleUndefinedStore(B, N2); + continue; + } + + // EXPERIMENTAL: "Conjured" symbols. + + if (RightV.isUnknown()) { + unsigned Count = Builder->getCurrentBlockCount(); + SymbolID Sym = SymMgr.getConjuredSymbol(B->getRHS(), Count); + + RightV = B->getRHS()->getType()->isPointerType() + ? cast<RVal>(lval::SymbolVal(Sym)) + : cast<RVal>(nonlval::SymbolVal(Sym)); + } + + // Even if the LHS evaluates to an unknown L-Value, the entire + // expression still evaluates to the RHS. + + if (LeftV.isUnknown()) { + St = SetRVal(St, B, RightV); + break; + } + + // Simulate the effects of a "store": bind the value of the RHS + // to the L-Value represented by the LHS. + + St = SetRVal(SetRVal(St, B, RightV), cast<LVal>(LeftV), RightV); + break; + } + + // Compound assignment operators. + + default: { + + assert (B->isCompoundAssignmentOp()); + + if (Op >= BinaryOperator::AndAssign) + ((int&) Op) -= (BinaryOperator::AndAssign - BinaryOperator::And); + else + ((int&) Op) -= BinaryOperator::MulAssign; + + // Check if the LHS is undefined. + + if (LeftV.isUndef()) { + HandleUndefinedStore(B, N2); + continue; + } + + if (LeftV.isUnknown()) { + assert (isa<UnknownVal>(GetRVal(St, B))); + Dst.Add(N2); + continue; + } + + // At this pointer we know that the LHS evaluates to an LVal + // that is neither "Unknown" or "Undefined." + + LVal LeftLV = cast<LVal>(LeftV); + + // Fetch the value of the LHS (the value of the variable, etc.). + + RVal V = GetRVal(GetState(N1), LeftLV, B->getLHS()->getType()); + + // Propagate undefined value (left-side). We + // propogate undefined values for the RHS below when + // we also check for divide-by-zero. + + if (V.isUndef()) { + St = SetRVal(St, B, V); + break; + } + + // Propagate unknown values. + + if (V.isUnknown()) { + // The value bound to LeftV is unknown. Thus we just + // propagate the current node (as "B" is already bound to nothing). + assert (isa<UnknownVal>(GetRVal(St, B))); + Dst.Add(N2); + continue; + } + + if (RightV.isUnknown()) { + assert (isa<UnknownVal>(GetRVal(St, B))); + St = SetRVal(St, LeftLV, UnknownVal()); + break; + } + + // At this point: + // + // The LHS is not Undef/Unknown. + // The RHS is not Unknown. + + // Get the computation type. + QualType CTy = cast<CompoundAssignOperator>(B)->getComputationType(); + + // Perform promotions. + V = EvalCast(V, CTy); + RightV = EvalCast(RightV, CTy); + + // Evaluate operands and promote to result type. + + if ((Op == BinaryOperator::Div || Op == BinaryOperator::Rem) + && RHS->getType()->isIntegerType()) { + + // Check if the denominator is undefined. + + if (RightV.isUndef()) { + NodeTy* DivUndef = Builder->generateNode(B, St, N2); + + if (DivUndef) { + DivUndef->markAsSink(); + ExplicitBadDivides.insert(DivUndef); + } + + continue; + } + + // First, "assume" that the denominator is 0. + + bool isFeasibleZero = false; + ValueState* ZeroSt = Assume(St, RightV, false, isFeasibleZero); + + // Second, "assume" that the denominator cannot be 0. + + bool isFeasibleNotZero = false; + St = Assume(St, RightV, true, isFeasibleNotZero); + + // Create the node for the divide-by-zero error (if it occurred). + + if (isFeasibleZero) { + NodeTy* DivZeroNode = Builder->generateNode(B, ZeroSt, N2); + + if (DivZeroNode) { + DivZeroNode->markAsSink(); + + if (isFeasibleNotZero) + ImplicitBadDivides.insert(DivZeroNode); + else + ExplicitBadDivides.insert(DivZeroNode); + } + } + + if (!isFeasibleNotZero) + continue; + + // Fall-through. The logic below processes the divide. + } + else { + + // Propagate undefined values (right-side). + + if (RightV.isUndef()) { + St = SetRVal(SetRVal(St, B, RightV), LeftLV, RightV); + break; + } + + } + + RVal Result = EvalCast(EvalBinOp(Op, V, RightV), B->getType()); + + if (Result.isUndef()) { + + // The operands were not undefined, but the result is undefined. + + if (NodeTy* UndefNode = Builder->generateNode(B, St, N2)) { + UndefNode->markAsSink(); + UndefResults.insert(UndefNode); + } + + continue; + } + + St = SetRVal(SetRVal(St, B, Result), LeftLV, Result); + } + } + + Nodify(Dst, B, N2, St); + } + } +} + +void GRExprEngine::HandleUndefinedStore(Stmt* S, NodeTy* Pred) { + NodeTy* N = Builder->generateNode(S, GetState(Pred), Pred); + N->markAsSink(); + UndefStores.insert(N); +} + +void GRExprEngine::Visit(Stmt* S, NodeTy* Pred, NodeSet& Dst) { + + // FIXME: add metadata to the CFG so that we can disable + // this check when we KNOW that there is no block-level subexpression. + // The motivation is that this check requires a hashtable lookup. + + if (S != CurrentStmt && getCFG().isBlkExpr(S)) { + Dst.Add(Pred); + return; + } + + switch (S->getStmtClass()) { + + default: + // Cases we intentionally have "default" handle: + // AddrLabelExpr, IntegerLiteral, CharacterLiteral + + Dst.Add(Pred); // No-op. Simply propagate the current state unchanged. + break; + + case Stmt::BinaryOperatorClass: { + BinaryOperator* B = cast<BinaryOperator>(S); + + if (B->isLogicalOp()) { + VisitLogicalExpr(B, Pred, Dst); + break; + } + else if (B->getOpcode() == BinaryOperator::Comma) { + ValueState* St = GetState(Pred); + Nodify(Dst, B, Pred, SetRVal(St, B, GetRVal(St, B->getRHS()))); + break; + } + + VisitBinaryOperator(cast<BinaryOperator>(S), Pred, Dst); + break; + } + + case Stmt::CallExprClass: { + CallExpr* C = cast<CallExpr>(S); + VisitCall(C, Pred, C->arg_begin(), C->arg_end(), Dst); + break; + } + + case Stmt::CastExprClass: { + CastExpr* C = cast<CastExpr>(S); + VisitCast(C, C->getSubExpr(), Pred, Dst); + break; + } + + // FIXME: ChooseExpr is really a constant. We need to fix + // the CFG do not model them as explicit control-flow. + + case Stmt::ChooseExprClass: { // __builtin_choose_expr + ChooseExpr* C = cast<ChooseExpr>(S); + VisitGuardedExpr(C, C->getLHS(), C->getRHS(), Pred, Dst); + break; + } + + case Stmt::CompoundAssignOperatorClass: + VisitBinaryOperator(cast<BinaryOperator>(S), Pred, Dst); + break; + + case Stmt::ConditionalOperatorClass: { // '?' operator + ConditionalOperator* C = cast<ConditionalOperator>(S); + VisitGuardedExpr(C, C->getLHS(), C->getRHS(), Pred, Dst); + break; + } + + case Stmt::DeclRefExprClass: + VisitDeclRefExpr(cast<DeclRefExpr>(S), Pred, Dst); + break; + + case Stmt::DeclStmtClass: + VisitDeclStmt(cast<DeclStmt>(S), Pred, Dst); + break; + + case Stmt::ImplicitCastExprClass: { + ImplicitCastExpr* C = cast<ImplicitCastExpr>(S); + VisitCast(C, C->getSubExpr(), Pred, Dst); + break; + } + + case Stmt::ParenExprClass: + Visit(cast<ParenExpr>(S)->getSubExpr(), Pred, Dst); + break; + + case Stmt::SizeOfAlignOfTypeExprClass: + VisitSizeOfAlignOfTypeExpr(cast<SizeOfAlignOfTypeExpr>(S), Pred, Dst); + break; + + case Stmt::StmtExprClass: { + StmtExpr* SE = cast<StmtExpr>(S); + + ValueState* St = GetState(Pred); + + // FIXME: Not certain if we can have empty StmtExprs. If so, we should + // probably just remove these from the CFG. + assert (!SE->getSubStmt()->body_empty()); + + if (Expr* LastExpr = dyn_cast<Expr>(*SE->getSubStmt()->body_rbegin())) + Nodify(Dst, SE, Pred, SetRVal(St, SE, GetRVal(St, LastExpr))); + else + Dst.Add(Pred); + + break; + } + + // FIXME: We may wish to always bind state to ReturnStmts so + // that users can quickly query what was the state at the + // exit points of a function. + + case Stmt::ReturnStmtClass: { + if (Expr* R = cast<ReturnStmt>(S)->getRetValue()) + Visit(R, Pred, Dst); + else + Dst.Add(Pred); + + break; + } + + case Stmt::UnaryOperatorClass: { + UnaryOperator* U = cast<UnaryOperator>(S); + + switch (U->getOpcode()) { + case UnaryOperator::Deref: VisitDeref(U, Pred, Dst); break; + case UnaryOperator::Plus: Visit(U->getSubExpr(), Pred, Dst); break; + case UnaryOperator::SizeOf: VisitSizeOfExpr(U, Pred, Dst); break; + default: VisitUnaryOperator(U, Pred, Dst); break; + } + + break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Assume" logic. +//===----------------------------------------------------------------------===// + +ValueState* GRExprEngine::Assume(ValueState* St, LVal Cond, + bool Assumption, + bool& isFeasible) { + switch (Cond.getSubKind()) { + default: + assert (false && "'Assume' not implemented for this LVal."); + return St; + + case lval::SymbolValKind: + if (Assumption) + return AssumeSymNE(St, cast<lval::SymbolVal>(Cond).getSymbol(), + BasicVals.getZeroWithPtrWidth(), isFeasible); + else + return AssumeSymEQ(St, cast<lval::SymbolVal>(Cond).getSymbol(), + BasicVals.getZeroWithPtrWidth(), isFeasible); + + + case lval::DeclValKind: + case lval::FuncValKind: + case lval::GotoLabelKind: + isFeasible = Assumption; + return St; + + case lval::ConcreteIntKind: { + bool b = cast<lval::ConcreteInt>(Cond).getValue() != 0; + isFeasible = b ? Assumption : !Assumption; + return St; + } + } +} + +ValueState* GRExprEngine::Assume(ValueState* St, NonLVal Cond, + bool Assumption, + bool& isFeasible) { + switch (Cond.getSubKind()) { + default: + assert (false && "'Assume' not implemented for this NonLVal."); + return St; + + + case nonlval::SymbolValKind: { + nonlval::SymbolVal& SV = cast<nonlval::SymbolVal>(Cond); + SymbolID sym = SV.getSymbol(); + + if (Assumption) + return AssumeSymNE(St, sym, BasicVals.getValue(0, SymMgr.getType(sym)), + isFeasible); + else + return AssumeSymEQ(St, sym, BasicVals.getValue(0, SymMgr.getType(sym)), + isFeasible); + } + + case nonlval::SymIntConstraintValKind: + return + AssumeSymInt(St, Assumption, + cast<nonlval::SymIntConstraintVal>(Cond).getConstraint(), + isFeasible); + + case nonlval::ConcreteIntKind: { + bool b = cast<nonlval::ConcreteInt>(Cond).getValue() != 0; + isFeasible = b ? Assumption : !Assumption; + return St; + } + } +} + +ValueState* +GRExprEngine::AssumeSymNE(ValueState* St, SymbolID sym, + const llvm::APSInt& V, bool& isFeasible) { + + // First, determine if sym == X, where X != V. + if (const llvm::APSInt* X = St->getSymVal(sym)) { + isFeasible = *X != V; + return St; + } + + // Second, determine if sym != V. + if (St->isNotEqual(sym, V)) { + isFeasible = true; + return St; + } + + // If we reach here, sym is not a constant and we don't know if it is != V. + // Make that assumption. + + isFeasible = true; + return StateMgr.AddNE(St, sym, V); +} + +ValueState* +GRExprEngine::AssumeSymEQ(ValueState* St, SymbolID sym, + const llvm::APSInt& V, bool& isFeasible) { + + // First, determine if sym == X, where X != V. + if (const llvm::APSInt* X = St->getSymVal(sym)) { + isFeasible = *X == V; + return St; + } + + // Second, determine if sym != V. + if (St->isNotEqual(sym, V)) { + isFeasible = false; + return St; + } + + // If we reach here, sym is not a constant and we don't know if it is == V. + // Make that assumption. + + isFeasible = true; + return StateMgr.AddEQ(St, sym, V); +} + +ValueState* +GRExprEngine::AssumeSymInt(ValueState* St, bool Assumption, + const SymIntConstraint& C, bool& isFeasible) { + + switch (C.getOpcode()) { + default: + // No logic yet for other operators. + isFeasible = true; + return St; + + case BinaryOperator::EQ: + if (Assumption) + return AssumeSymEQ(St, C.getSymbol(), C.getInt(), isFeasible); + else + return AssumeSymNE(St, C.getSymbol(), C.getInt(), isFeasible); + + case BinaryOperator::NE: + if (Assumption) + return AssumeSymNE(St, C.getSymbol(), C.getInt(), isFeasible); + else + return AssumeSymEQ(St, C.getSymbol(), C.getInt(), isFeasible); + } +} + +//===----------------------------------------------------------------------===// +// Visualization. +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +static GRExprEngine* GraphPrintCheckerState; +static SourceManager* GraphPrintSourceManager; +static ValueState::CheckerStatePrinter* GraphCheckerStatePrinter; + +namespace llvm { +template<> +struct VISIBILITY_HIDDEN DOTGraphTraits<GRExprEngine::NodeTy*> : + public DefaultDOTGraphTraits { + + static void PrintVarBindings(std::ostream& Out, ValueState* St) { + + Out << "Variables:\\l"; + + bool isFirst = true; + + for (ValueState::vb_iterator I=St->vb_begin(), E=St->vb_end(); I!=E;++I) { + + if (isFirst) + isFirst = false; + else + Out << "\\l"; + + Out << ' ' << I.getKey()->getName() << " : "; + I.getData().print(Out); + } + + } + + + static void PrintSubExprBindings(std::ostream& Out, ValueState* St){ + + bool isFirst = true; + + for (ValueState::seb_iterator I=St->seb_begin(), E=St->seb_end();I!=E;++I) { + + if (isFirst) { + Out << "\\l\\lSub-Expressions:\\l"; + isFirst = false; + } + else + Out << "\\l"; + + Out << " (" << (void*) I.getKey() << ") "; + I.getKey()->printPretty(Out); + Out << " : "; + I.getData().print(Out); + } + } + + static void PrintBlkExprBindings(std::ostream& Out, ValueState* St){ + + bool isFirst = true; + + for (ValueState::beb_iterator I=St->beb_begin(), E=St->beb_end(); I!=E;++I){ + if (isFirst) { + Out << "\\l\\lBlock-level Expressions:\\l"; + isFirst = false; + } + else + Out << "\\l"; + + Out << " (" << (void*) I.getKey() << ") "; + I.getKey()->printPretty(Out); + Out << " : "; + I.getData().print(Out); + } + } + + static void PrintEQ(std::ostream& Out, ValueState* St) { + ValueState::ConstEqTy CE = St->ConstEq; + + if (CE.isEmpty()) + return; + + Out << "\\l\\|'==' constraints:"; + + for (ValueState::ConstEqTy::iterator I=CE.begin(), E=CE.end(); I!=E;++I) + Out << "\\l $" << I.getKey() << " : " << I.getData()->toString(); + } + + static void PrintNE(std::ostream& Out, ValueState* St) { + ValueState::ConstNotEqTy NE = St->ConstNotEq; + + if (NE.isEmpty()) + return; + + Out << "\\l\\|'!=' constraints:"; + + for (ValueState::ConstNotEqTy::iterator I=NE.begin(), EI=NE.end(); + I != EI; ++I){ + + Out << "\\l $" << I.getKey() << " : "; + bool isFirst = true; + + ValueState::IntSetTy::iterator J=I.getData().begin(), + EJ=I.getData().end(); + for ( ; J != EJ; ++J) { + if (isFirst) isFirst = false; + else Out << ", "; + + Out << (*J)->toString(); + } + } + } + + static std::string getNodeAttributes(const GRExprEngine::NodeTy* N, void*) { + + if (GraphPrintCheckerState->isImplicitNullDeref(N) || + GraphPrintCheckerState->isExplicitNullDeref(N) || + GraphPrintCheckerState->isUndefDeref(N) || + GraphPrintCheckerState->isUndefStore(N) || + GraphPrintCheckerState->isUndefControlFlow(N) || + GraphPrintCheckerState->isExplicitBadDivide(N) || + GraphPrintCheckerState->isImplicitBadDivide(N) || + GraphPrintCheckerState->isUndefResult(N) || + GraphPrintCheckerState->isBadCall(N) || + GraphPrintCheckerState->isUndefArg(N)) + return "color=\"red\",style=\"filled\""; + + if (GraphPrintCheckerState->isNoReturnCall(N)) + return "color=\"blue\",style=\"filled\""; + + return ""; + } + + static std::string getNodeLabel(const GRExprEngine::NodeTy* N, void*) { + std::ostringstream Out; + + // Program Location. + ProgramPoint Loc = N->getLocation(); + + switch (Loc.getKind()) { + case ProgramPoint::BlockEntranceKind: + Out << "Block Entrance: B" + << cast<BlockEntrance>(Loc).getBlock()->getBlockID(); + break; + + case ProgramPoint::BlockExitKind: + assert (false); + break; + + case ProgramPoint::PostStmtKind: { + const PostStmt& L = cast<PostStmt>(Loc); + Stmt* S = L.getStmt(); + SourceLocation SLoc = S->getLocStart(); + + Out << S->getStmtClassName() << ' ' << (void*) S << ' '; + S->printPretty(Out); + + if (SLoc.isFileID()) { + Out << "\\lline=" + << GraphPrintSourceManager->getLineNumber(SLoc) << " col=" + << GraphPrintSourceManager->getColumnNumber(SLoc) << "\\l"; + } + + if (GraphPrintCheckerState->isImplicitNullDeref(N)) + Out << "\\|Implicit-Null Dereference.\\l"; + else if (GraphPrintCheckerState->isExplicitNullDeref(N)) + Out << "\\|Explicit-Null Dereference.\\l"; + else if (GraphPrintCheckerState->isUndefDeref(N)) + Out << "\\|Dereference of undefialied value.\\l"; + else if (GraphPrintCheckerState->isUndefStore(N)) + Out << "\\|Store to Undefined LVal."; + else if (GraphPrintCheckerState->isExplicitBadDivide(N)) + Out << "\\|Explicit divide-by zero or undefined value."; + else if (GraphPrintCheckerState->isImplicitBadDivide(N)) + Out << "\\|Implicit divide-by zero or undefined value."; + else if (GraphPrintCheckerState->isUndefResult(N)) + Out << "\\|Result of operation is undefined."; + else if (GraphPrintCheckerState->isNoReturnCall(N)) + Out << "\\|Call to function marked \"noreturn\"."; + else if (GraphPrintCheckerState->isBadCall(N)) + Out << "\\|Call to NULL/Undefined."; + else if (GraphPrintCheckerState->isUndefArg(N)) + Out << "\\|Argument in call is undefined"; + + break; + } + + default: { + const BlockEdge& E = cast<BlockEdge>(Loc); + Out << "Edge: (B" << E.getSrc()->getBlockID() << ", B" + << E.getDst()->getBlockID() << ')'; + + if (Stmt* T = E.getSrc()->getTerminator()) { + + SourceLocation SLoc = T->getLocStart(); + + Out << "\\|Terminator: "; + + E.getSrc()->printTerminator(Out); + + if (SLoc.isFileID()) { + Out << "\\lline=" + << GraphPrintSourceManager->getLineNumber(SLoc) << " col=" + << GraphPrintSourceManager->getColumnNumber(SLoc); + } + + if (isa<SwitchStmt>(T)) { + Stmt* Label = E.getDst()->getLabel(); + + if (Label) { + if (CaseStmt* C = dyn_cast<CaseStmt>(Label)) { + Out << "\\lcase "; + C->getLHS()->printPretty(Out); + + if (Stmt* RHS = C->getRHS()) { + Out << " .. "; + RHS->printPretty(Out); + } + + Out << ":"; + } + else { + assert (isa<DefaultStmt>(Label)); + Out << "\\ldefault:"; + } + } + else + Out << "\\l(implicit) default:"; + } + else if (isa<IndirectGotoStmt>(T)) { + // FIXME + } + else { + Out << "\\lCondition: "; + if (*E.getSrc()->succ_begin() == E.getDst()) + Out << "true"; + else + Out << "false"; + } + + Out << "\\l"; + } + + if (GraphPrintCheckerState->isUndefControlFlow(N)) { + Out << "\\|Control-flow based on\\lUndefined value.\\l"; + } + } + } + + Out << "\\|StateID: " << (void*) N->getState() << "\\|"; + + N->getState()->printDOT(Out, GraphCheckerStatePrinter); + + Out << "\\l"; + return Out.str(); + } +}; +} // end llvm namespace +#endif + +#ifndef NDEBUG + +template <typename ITERATOR> +GRExprEngine::NodeTy* GetGraphNode(ITERATOR I) { return *I; } + +template <> +GRExprEngine::NodeTy* +GetGraphNode<llvm::DenseMap<GRExprEngine::NodeTy*, Expr*>::iterator> + (llvm::DenseMap<GRExprEngine::NodeTy*, Expr*>::iterator I) { + return I->first; +} + +template <typename ITERATOR> +static void AddSources(llvm::SmallVector<GRExprEngine::NodeTy*, 10>& Sources, + ITERATOR I, ITERATOR E) { + + llvm::SmallPtrSet<void*,10> CachedSources; + + for ( ; I != E; ++I ) { + GRExprEngine::NodeTy* N = GetGraphNode(I); + void* p = N->getLocation().getRawData(); + + if (CachedSources.count(p)) + continue; + + CachedSources.insert(p); + + Sources.push_back(N); + } +} +#endif + +void GRExprEngine::ViewGraph(bool trim) { +#ifndef NDEBUG + if (trim) { + llvm::SmallVector<NodeTy*, 10> Src; + AddSources(Src, null_derefs_begin(), null_derefs_end()); + AddSources(Src, undef_derefs_begin(), undef_derefs_end()); + AddSources(Src, explicit_bad_divides_begin(), explicit_bad_divides_end()); + AddSources(Src, undef_results_begin(), undef_results_end()); + AddSources(Src, bad_calls_begin(), bad_calls_end()); + AddSources(Src, undef_arg_begin(), undef_arg_end()); + AddSources(Src, undef_branches_begin(), undef_branches_end()); + + ViewGraph(&Src[0], &Src[0]+Src.size()); + } + else { + GraphPrintCheckerState = this; + GraphPrintSourceManager = &getContext().getSourceManager(); + GraphCheckerStatePrinter = TF->getCheckerStatePrinter(); + + llvm::ViewGraph(*G.roots_begin(), "GRExprEngine"); + + GraphPrintCheckerState = NULL; + GraphPrintSourceManager = NULL; + GraphCheckerStatePrinter = NULL; + } +#endif +} + +void GRExprEngine::ViewGraph(NodeTy** Beg, NodeTy** End) { +#ifndef NDEBUG + GraphPrintCheckerState = this; + GraphPrintSourceManager = &getContext().getSourceManager(); + GraphCheckerStatePrinter = TF->getCheckerStatePrinter(); + + GRExprEngine::GraphTy* TrimmedG = G.Trim(Beg, End); + + if (!TrimmedG) + llvm::cerr << "warning: Trimmed ExplodedGraph is empty.\n"; + else { + llvm::ViewGraph(*TrimmedG->roots_begin(), "TrimmedGRExprEngine"); + delete TrimmedG; + } + + GraphPrintCheckerState = NULL; + GraphPrintSourceManager = NULL; + GraphCheckerStatePrinter = NULL; +#endif +} diff --git a/clang/lib/Analysis/GRSimpleVals.cpp b/clang/lib/Analysis/GRSimpleVals.cpp new file mode 100644 index 00000000000..3777d53bf0b --- /dev/null +++ b/clang/lib/Analysis/GRSimpleVals.cpp @@ -0,0 +1,462 @@ +// GRSimpleVals.cpp - Transfer functions for tracking simple values -*- C++ -*-- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines GRSimpleVals, a sub-class of GRTransferFuncs that +// provides transfer functions for performing simple value tracking with +// limited support for symbolics. +// +//===----------------------------------------------------------------------===// + +#include "GRSimpleVals.h" +#include "clang/Analysis/PathSensitive/ValueState.h" +#include "clang/Basic/Diagnostic.h" +#include <sstream> + +using namespace clang; + +namespace clang { + +template <typename ITERATOR> +static inline ProgramPoint GetLocation(ITERATOR I) { + return (*I)->getLocation(); +} + +template <> +inline ProgramPoint GetLocation(GRExprEngine::undef_arg_iterator I) { + return I->first->getLocation(); +} + +static inline Stmt* GetStmt(const ProgramPoint& P) { + if (const PostStmt* PS = dyn_cast<PostStmt>(&P)) { + return PS->getStmt(); + } + else if (const BlockEdge* BE = dyn_cast<BlockEdge>(&P)) { + return BE->getSrc()->getTerminator(); + } + + assert (false && "Unsupported ProgramPoint."); + return NULL; +} + +template <typename ITERATOR> +static void EmitDiag(Diagnostic& Diag, SourceManager& SrcMgr, + unsigned ErrorDiag, ITERATOR I) { + + Stmt* S = GetStmt(GetLocation(I)); + Diag.Report(FullSourceLoc(S->getLocStart(), SrcMgr), ErrorDiag); +} + + +template <> +static void EmitDiag(Diagnostic& Diag, SourceManager& SrcMgr, + unsigned ErrorDiag, GRExprEngine::undef_arg_iterator I) { + + Stmt* S1 = GetStmt(GetLocation(I)); + Expr* E2 = cast<Expr>(I->second); + + SourceLocation Loc = S1->getLocStart(); + SourceRange R = E2->getSourceRange(); + Diag.Report(FullSourceLoc(Loc, SrcMgr), ErrorDiag, 0, 0, &R, 1); +} + +template <typename ITERATOR> +void EmitWarning(Diagnostic& Diag, SourceManager& SrcMgr, + ITERATOR I, ITERATOR E, const char* msg) { + + std::ostringstream Out; + Out << "[CHECKER] " << msg; + msg = Out.str().c_str(); + + bool isFirst = true; + unsigned ErrorDiag = 0; + llvm::SmallPtrSet<void*,10> CachedErrors; + + for (; I != E; ++I) { + + if (isFirst) { + isFirst = false; + ErrorDiag = Diag.getCustomDiagID(Diagnostic::Warning, msg); + } + else { + + // HACK: Cache the location of the error. Don't emit the same + // warning for the same error type that occurs at the same program + // location but along a different path. + void* p = GetLocation(I).getRawData(); + + if (CachedErrors.count(p)) + continue; + + CachedErrors.insert(p); + } + + EmitDiag(Diag, SrcMgr, ErrorDiag, I); + } +} + +unsigned RunGRSimpleVals(CFG& cfg, Decl& CD, ASTContext& Ctx, + Diagnostic& Diag, bool Visualize, bool TrimGraph) { + + if (Diag.hasErrorOccurred()) + return 0; + + GRCoreEngine<GRExprEngine> Eng(cfg, CD, Ctx); + GRExprEngine* CheckerState = &Eng.getCheckerState(); + GRSimpleVals GRSV; + CheckerState->setTransferFunctions(GRSV); + + // Execute the worklist algorithm. + Eng.ExecuteWorkList(100000); + + SourceManager& SrcMgr = Ctx.getSourceManager(); + + EmitWarning(Diag, SrcMgr, + CheckerState->null_derefs_begin(), + CheckerState->null_derefs_end(), + "NULL pointer is dereferenced after it is checked for NULL."); + + EmitWarning(Diag, SrcMgr, + CheckerState->undef_derefs_begin(), + CheckerState->undef_derefs_end(), + "Dereference of undefined value."); + + EmitWarning(Diag, SrcMgr, + CheckerState->undef_derefs_begin(), + CheckerState->undef_derefs_end(), + "Dereference of undefined value."); + + EmitWarning(Diag, SrcMgr, + CheckerState->explicit_bad_divides_begin(), + CheckerState->explicit_bad_divides_end(), + "Division by zero/undefined value."); + + EmitWarning(Diag, SrcMgr, + CheckerState->undef_results_begin(), + CheckerState->undef_results_end(), + "Result of operation is undefined."); + + EmitWarning(Diag, SrcMgr, + CheckerState->bad_calls_begin(), + CheckerState->bad_calls_end(), + "Call using a NULL or undefined function pointer value."); + + EmitWarning(Diag, SrcMgr, + CheckerState->undef_arg_begin(), + CheckerState->undef_arg_end(), + "Pass-by-value argument in function or message expression is undefined."); + + EmitWarning(Diag, SrcMgr, + CheckerState->undef_branches_begin(), + CheckerState->undef_branches_end(), + "Branch condition evaluates to an uninitialized value."); + +#ifndef NDEBUG + if (Visualize) CheckerState->ViewGraph(TrimGraph); +#endif + + return Eng.getGraph().size(); +} + +} // end clang namespace + +//===----------------------------------------------------------------------===// +// Transfer function for Casts. +//===----------------------------------------------------------------------===// + +RVal GRSimpleVals::EvalCast(GRExprEngine& Eng, NonLVal X, QualType T) { + + if (!isa<nonlval::ConcreteInt>(X)) + return UnknownVal(); + + BasicValueFactory& BasicVals = Eng.getBasicVals(); + + llvm::APSInt V = cast<nonlval::ConcreteInt>(X).getValue(); + V.setIsUnsigned(T->isUnsignedIntegerType() || T->isPointerType() + || T->isObjCQualifiedIdType()); + V.extOrTrunc(Eng.getContext().getTypeSize(T)); + + if (T->isPointerType()) + return lval::ConcreteInt(BasicVals.getValue(V)); + else + return nonlval::ConcreteInt(BasicVals.getValue(V)); +} + +// Casts. + +RVal GRSimpleVals::EvalCast(GRExprEngine& Eng, LVal X, QualType T) { + + if (T->isPointerType() || T->isReferenceType() || T->isObjCQualifiedIdType()) + return X; + + assert (T->isIntegerType()); + + if (!isa<lval::ConcreteInt>(X)) + return UnknownVal(); + + BasicValueFactory& BasicVals = Eng.getBasicVals(); + + llvm::APSInt V = cast<lval::ConcreteInt>(X).getValue(); + V.setIsUnsigned(T->isUnsignedIntegerType() || T->isPointerType()); + V.extOrTrunc(Eng.getContext().getTypeSize(T)); + + return nonlval::ConcreteInt(BasicVals.getValue(V)); +} + +// Unary operators. + +RVal GRSimpleVals::EvalMinus(GRExprEngine& Eng, UnaryOperator* U, NonLVal X){ + + switch (X.getSubKind()) { + + case nonlval::ConcreteIntKind: + return cast<nonlval::ConcreteInt>(X).EvalMinus(Eng.getBasicVals(), U); + + default: + return UnknownVal(); + } +} + +RVal GRSimpleVals::EvalComplement(GRExprEngine& Eng, NonLVal X) { + + switch (X.getSubKind()) { + + case nonlval::ConcreteIntKind: + return cast<nonlval::ConcreteInt>(X).EvalComplement(Eng.getBasicVals()); + + default: + return UnknownVal(); + } +} + +// Binary operators. + +RVal GRSimpleVals::EvalBinOp(GRExprEngine& Eng, BinaryOperator::Opcode Op, + NonLVal L, NonLVal R) { + + BasicValueFactory& BasicVals = Eng.getBasicVals(); + + while (1) { + + switch (L.getSubKind()) { + default: + return UnknownVal(); + + case nonlval::ConcreteIntKind: + + if (isa<nonlval::ConcreteInt>(R)) { + const nonlval::ConcreteInt& L_CI = cast<nonlval::ConcreteInt>(L); + const nonlval::ConcreteInt& R_CI = cast<nonlval::ConcreteInt>(R); + return L_CI.EvalBinOp(BasicVals, Op, R_CI); + } + else { + NonLVal tmp = R; + R = L; + L = tmp; + continue; + } + + case nonlval::SymbolValKind: { + + if (isa<nonlval::ConcreteInt>(R)) { + const SymIntConstraint& C = + BasicVals.getConstraint(cast<nonlval::SymbolVal>(L).getSymbol(), Op, + cast<nonlval::ConcreteInt>(R).getValue()); + + return nonlval::SymIntConstraintVal(C); + } + else + return UnknownVal(); + } + } + } +} + + +// Binary Operators (except assignments and comma). + +RVal GRSimpleVals::EvalBinOp(GRExprEngine& Eng, BinaryOperator::Opcode Op, + LVal L, LVal R) { + + switch (Op) { + + default: + return UnknownVal(); + + case BinaryOperator::EQ: + return EvalEQ(Eng, L, R); + + case BinaryOperator::NE: + return EvalNE(Eng, L, R); + } +} + +// Pointer arithmetic. + +RVal GRSimpleVals::EvalBinOp(GRExprEngine& Eng, BinaryOperator::Opcode Op, + LVal L, NonLVal R) { + return UnknownVal(); +} + +// Equality operators for LVals. + +RVal GRSimpleVals::EvalEQ(GRExprEngine& Eng, LVal L, LVal R) { + + BasicValueFactory& BasicVals = Eng.getBasicVals(); + + switch (L.getSubKind()) { + + default: + assert(false && "EQ not implemented for this LVal."); + return UnknownVal(); + + case lval::ConcreteIntKind: + + if (isa<lval::ConcreteInt>(R)) { + bool b = cast<lval::ConcreteInt>(L).getValue() == + cast<lval::ConcreteInt>(R).getValue(); + + return NonLVal::MakeIntTruthVal(BasicVals, b); + } + else if (isa<lval::SymbolVal>(R)) { + + const SymIntConstraint& C = + BasicVals.getConstraint(cast<lval::SymbolVal>(R).getSymbol(), + BinaryOperator::EQ, + cast<lval::ConcreteInt>(L).getValue()); + + return nonlval::SymIntConstraintVal(C); + } + + break; + + case lval::SymbolValKind: { + + if (isa<lval::ConcreteInt>(R)) { + const SymIntConstraint& C = + BasicVals.getConstraint(cast<lval::SymbolVal>(L).getSymbol(), + BinaryOperator::EQ, + cast<lval::ConcreteInt>(R).getValue()); + + return nonlval::SymIntConstraintVal(C); + } + + // FIXME: Implement == for lval Symbols. This is mainly useful + // in iterator loops when traversing a buffer, e.g. while(z != zTerm). + // Since this is not useful for many checkers we'll punt on this for + // now. + + return UnknownVal(); + } + + case lval::DeclValKind: + case lval::FuncValKind: + case lval::GotoLabelKind: + return NonLVal::MakeIntTruthVal(BasicVals, L == R); + } + + return NonLVal::MakeIntTruthVal(BasicVals, false); +} + +RVal GRSimpleVals::EvalNE(GRExprEngine& Eng, LVal L, LVal R) { + + BasicValueFactory& BasicVals = Eng.getBasicVals(); + + switch (L.getSubKind()) { + + default: + assert(false && "NE not implemented for this LVal."); + return UnknownVal(); + + case lval::ConcreteIntKind: + + if (isa<lval::ConcreteInt>(R)) { + bool b = cast<lval::ConcreteInt>(L).getValue() != + cast<lval::ConcreteInt>(R).getValue(); + + return NonLVal::MakeIntTruthVal(BasicVals, b); + } + else if (isa<lval::SymbolVal>(R)) { + const SymIntConstraint& C = + BasicVals.getConstraint(cast<lval::SymbolVal>(R).getSymbol(), + BinaryOperator::NE, + cast<lval::ConcreteInt>(L).getValue()); + + return nonlval::SymIntConstraintVal(C); + } + + break; + + case lval::SymbolValKind: { + if (isa<lval::ConcreteInt>(R)) { + const SymIntConstraint& C = + BasicVals.getConstraint(cast<lval::SymbolVal>(L).getSymbol(), + BinaryOperator::NE, + cast<lval::ConcreteInt>(R).getValue()); + + return nonlval::SymIntConstraintVal(C); + } + + // FIXME: Implement != for lval Symbols. This is mainly useful + // in iterator loops when traversing a buffer, e.g. while(z != zTerm). + // Since this is not useful for many checkers we'll punt on this for + // now. + + return UnknownVal(); + + break; + } + + case lval::DeclValKind: + case lval::FuncValKind: + case lval::GotoLabelKind: + return NonLVal::MakeIntTruthVal(BasicVals, L != R); + } + + return NonLVal::MakeIntTruthVal(BasicVals, true); +} + +//===----------------------------------------------------------------------===// +// Transfer function for Function Calls. +//===----------------------------------------------------------------------===// + +void GRSimpleVals::EvalCall(ExplodedNodeSet<ValueState>& Dst, + GRExprEngine& Eng, + GRStmtNodeBuilder<ValueState>& Builder, + CallExpr* CE, LVal L, + ExplodedNode<ValueState>* Pred) { + + ValueStateManager& StateMgr = Eng.getStateManager(); + ValueState* St = Builder.GetState(Pred); + + // Invalidate all arguments passed in by reference (LVals). + + for (CallExpr::arg_iterator I = CE->arg_begin(), E = CE->arg_end(); + I != E; ++I) { + + RVal V = StateMgr.GetRVal(St, *I); + + if (isa<LVal>(V)) + St = StateMgr.SetRVal(St, cast<LVal>(V), UnknownVal()); + } + + // Make up a symbol for the return value of this function. + + if (CE->getType() != Eng.getContext().VoidTy) { + unsigned Count = Builder.getCurrentBlockCount(); + SymbolID Sym = Eng.getSymbolManager().getConjuredSymbol(CE, Count); + + RVal X = CE->getType()->isPointerType() + ? cast<RVal>(lval::SymbolVal(Sym)) + : cast<RVal>(nonlval::SymbolVal(Sym)); + + St = StateMgr.SetRVal(St, CE, X, Eng.getCFG().isBlkExpr(CE), false); + } + + Builder.Nodify(Dst, CE, Pred, St); +} diff --git a/clang/lib/Analysis/GRSimpleVals.h b/clang/lib/Analysis/GRSimpleVals.h new file mode 100644 index 00000000000..2b3d0fd00a2 --- /dev/null +++ b/clang/lib/Analysis/GRSimpleVals.h @@ -0,0 +1,71 @@ +// GRSimpleVals.h - Transfer functions for tracking simple values -*- C++ -*--// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines GRSimpleVals, a sub-class of GRTransferFuncs that +// provides transfer functions for performing simple value tracking with +// limited support for symbolics. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_GRSIMPLEVALS +#define LLVM_CLANG_ANALYSIS_GRSIMPLEVALS + +#include "clang/Analysis/PathSensitive/GRTransferFuncs.h" +#include "clang/Analysis/PathSensitive/GRExprEngine.h" + +namespace clang { + +class GRSimpleVals : public GRTransferFuncs { +public: + GRSimpleVals() {} + virtual ~GRSimpleVals() {} + + // Casts. + + virtual RVal EvalCast(GRExprEngine& Engine, NonLVal V, QualType CastT); + virtual RVal EvalCast(GRExprEngine& Engine, LVal V, QualType CastT); + + // Unary Operators. + + virtual RVal EvalMinus(GRExprEngine& Engine, UnaryOperator* U, NonLVal X); + + virtual RVal EvalComplement(GRExprEngine& Engine, NonLVal X); + + // Binary Operators. + + virtual RVal EvalBinOp(GRExprEngine& Engine, BinaryOperator::Opcode Op, + NonLVal L, NonLVal R); + + virtual RVal EvalBinOp(GRExprEngine& Engine, BinaryOperator::Opcode Op, + LVal L, LVal R); + + // Pointer arithmetic. + + virtual RVal EvalBinOp(GRExprEngine& Engine, BinaryOperator::Opcode Op, + LVal L, NonLVal R); + + // Calls. + + virtual void EvalCall(ExplodedNodeSet<ValueState>& Dst, + GRExprEngine& Engine, + GRStmtNodeBuilder<ValueState>& Builder, + CallExpr* CE, LVal L, + ExplodedNode<ValueState>* Pred); + +protected: + + // Equality operators for LVals. + + RVal EvalEQ(GRExprEngine& Engine, LVal L, LVal R); + RVal EvalNE(GRExprEngine& Engine, LVal L, LVal R); +}; + +} // end clang namespace + +#endif diff --git a/clang/lib/Analysis/LiveVariables.cpp b/clang/lib/Analysis/LiveVariables.cpp new file mode 100644 index 00000000000..e59a4885911 --- /dev/null +++ b/clang/lib/Analysis/LiveVariables.cpp @@ -0,0 +1,246 @@ +//=- LiveVariables.cpp - Live Variable Analysis for Source CFGs -*- C++ --*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// details. +// +//===----------------------------------------------------------------------===// +// +// This file implements Live Variables analysis for source-level CFGs. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LiveVariables.h" +#include "clang/Basic/SourceManager.h" +#include "clang/AST/Expr.h" +#include "clang/AST/CFG.h" +#include "clang/Analysis/Visitors/CFGRecStmtDeclVisitor.h" +#include "clang/Analysis/FlowSensitive/DataflowSolver.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/Compiler.h" + +#include <string.h> +#include <stdio.h> + +using namespace clang; + +//===----------------------------------------------------------------------===// +// Dataflow initialization logic. +//===----------------------------------------------------------------------===// + +namespace { +class VISIBILITY_HIDDEN RegisterDecls + : public CFGRecStmtDeclVisitor<RegisterDecls> { + + LiveVariables::AnalysisDataTy& AD; +public: + RegisterDecls(LiveVariables::AnalysisDataTy& ad) : AD(ad) {} + void VisitVarDecl(VarDecl* VD) { AD.Register(VD); } + CFG& getCFG() { return AD.getCFG(); } +}; +} // end anonymous namespace + + +LiveVariables::LiveVariables(CFG& cfg) { + // Register all referenced VarDecls. + getAnalysisData().setCFG(&cfg); + RegisterDecls R(getAnalysisData()); + cfg.VisitBlockStmts(R); +} + +//===----------------------------------------------------------------------===// +// Transfer functions. +//===----------------------------------------------------------------------===// + +namespace { + +static const bool Alive = true; +static const bool Dead = false; + +class VISIBILITY_HIDDEN TransferFuncs : public CFGRecStmtVisitor<TransferFuncs>{ + LiveVariables::AnalysisDataTy& AD; + LiveVariables::ValTy LiveState; +public: + TransferFuncs(LiveVariables::AnalysisDataTy& ad) : AD(ad) {} + + LiveVariables::ValTy& getVal() { return LiveState; } + CFG& getCFG() { return AD.getCFG(); } + + void VisitDeclRefExpr(DeclRefExpr* DR); + void VisitBinaryOperator(BinaryOperator* B); + void VisitAssign(BinaryOperator* B); + void VisitDeclStmt(DeclStmt* DS); + void VisitUnaryOperator(UnaryOperator* U); + void Visit(Stmt *S); +}; + +void TransferFuncs::Visit(Stmt *S) { + if (AD.Observer) + AD.Observer->ObserveStmt(S,AD,LiveState); + + if (S == getCurrentBlkStmt()) { + if (getCFG().isBlkExpr(S)) LiveState(S,AD) = Dead; + StmtVisitor<TransferFuncs,void>::Visit(S); + } + else if (!getCFG().isBlkExpr(S)) + StmtVisitor<TransferFuncs,void>::Visit(S); + else + // For block-level expressions, mark that they are live. + LiveState(S,AD) = Alive; +} + +void TransferFuncs::VisitDeclRefExpr(DeclRefExpr* DR) { + if (VarDecl* V = dyn_cast<VarDecl>(DR->getDecl())) + LiveState(V,AD) = Alive; +} + +void TransferFuncs::VisitBinaryOperator(BinaryOperator* B) { + if (B->isAssignmentOp()) VisitAssign(B); + else VisitStmt(B); +} + +void TransferFuncs::VisitUnaryOperator(UnaryOperator* U) { + Expr *E = U->getSubExpr(); + + switch (U->getOpcode()) { + case UnaryOperator::SizeOf: return; + case UnaryOperator::PostInc: + case UnaryOperator::PostDec: + case UnaryOperator::PreInc: + case UnaryOperator::PreDec: + case UnaryOperator::AddrOf: + // Walk through the subexpressions, blasting through ParenExprs + // until we either find a DeclRefExpr or some non-DeclRefExpr + // expression. + if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(E->IgnoreParens())) + if (VarDecl* VD = dyn_cast<VarDecl>(DR->getDecl())) { + // Treat the --/++/& operator as a kill. + LiveState(VD, AD) = Dead; + if (AD.Observer) { AD.Observer->ObserverKill(DR); } + return VisitDeclRefExpr(DR); + } + + // Fall-through. + + default: + return Visit(E); + } +} + +void TransferFuncs::VisitAssign(BinaryOperator* B) { + Expr* LHS = B->getLHS(); + + // Assigning to a variable? + if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(LHS->IgnoreParens())) { + LiveState(DR->getDecl(),AD) = Dead; + if (AD.Observer) { AD.Observer->ObserverKill(DR); } + + // Handle things like +=, etc., which also generate "uses" + // of a variable. Do this just by visiting the subexpression. + if (B->getOpcode() != BinaryOperator::Assign) + VisitDeclRefExpr(DR); + } + else // Not assigning to a variable. Process LHS as usual. + Visit(LHS); + + Visit(B->getRHS()); +} + +void TransferFuncs::VisitDeclStmt(DeclStmt* DS) { + // Declarations effectively "kill" a variable since they cannot + // possibly be live before they are declared. + for (ScopedDecl* D = DS->getDecl(); D != NULL; D = D->getNextDeclarator()) + if (VarDecl* VD = dyn_cast<VarDecl>(D)) { + LiveState(D,AD) = Dead; + + if (Expr* Init = VD->getInit()) + Visit(Init); + } +} + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Merge operator: if something is live on any successor block, it is live +// in the current block (a set union). +//===----------------------------------------------------------------------===// + +namespace { +typedef ExprDeclBitVector_Types::Union Merge; +typedef DataflowSolver<LiveVariables,TransferFuncs,Merge> Solver; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// External interface to run Liveness analysis. +//===----------------------------------------------------------------------===// + +void LiveVariables::runOnCFG(CFG& cfg) { + Solver S(*this); + S.runOnCFG(cfg); +} + +void LiveVariables::runOnAllBlocks(const CFG& cfg, + LiveVariables::ObserverTy* Obs, + bool recordStmtValues) { + Solver S(*this); + ObserverTy* OldObserver = getAnalysisData().Observer; + getAnalysisData().Observer = Obs; + S.runOnAllBlocks(cfg, recordStmtValues); + getAnalysisData().Observer = OldObserver; +} + +//===----------------------------------------------------------------------===// +// liveness queries +// + +bool LiveVariables::isLive(const CFGBlock* B, const VarDecl* D) const { + DeclBitVector_Types::Idx i = getAnalysisData().getIdx(D); + return i.isValid() ? getBlockData(B).getBit(i) : false; +} + +bool LiveVariables::isLive(const ValTy& Live, const VarDecl* D) const { + DeclBitVector_Types::Idx i = getAnalysisData().getIdx(D); + return i.isValid() ? Live.getBit(i) : false; +} + +bool LiveVariables::isLive(const Stmt* Loc, const Stmt* StmtVal) const { + return getStmtData(Loc)(StmtVal,getAnalysisData()); +} + +bool LiveVariables::isLive(const Stmt* Loc, const VarDecl* D) const { + return getStmtData(Loc)(D,getAnalysisData()); +} + +//===----------------------------------------------------------------------===// +// printing liveness state for debugging +// + +void LiveVariables::dumpLiveness(const ValTy& V, SourceManager& SM) const { + const AnalysisDataTy& AD = getAnalysisData(); + + for (AnalysisDataTy::decl_iterator I = AD.begin_decl(), + E = AD.end_decl(); I!=E; ++I) + if (V.getDeclBit(I->second)) { + SourceLocation PhysLoc = SM.getPhysicalLoc(I->first->getLocation()); + + fprintf(stderr, " %s <%s:%u:%u>\n", + I->first->getIdentifier()->getName(), + SM.getSourceName(PhysLoc), + SM.getLineNumber(PhysLoc), + SM.getColumnNumber(PhysLoc)); + } +} + +void LiveVariables::dumpBlockLiveness(SourceManager& M) const { + for (BlockDataMapTy::iterator I = getBlockDataMap().begin(), + E = getBlockDataMap().end(); I!=E; ++I) { + fprintf(stderr, "\n[ B%d (live variables at block exit) ]\n", + I->first->getBlockID()); + + dumpLiveness(I->second,M); + } + + fprintf(stderr,"\n"); +} diff --git a/clang/lib/Analysis/Makefile b/clang/lib/Analysis/Makefile new file mode 100644 index 00000000000..b1d91781823 --- /dev/null +++ b/clang/lib/Analysis/Makefile @@ -0,0 +1,22 @@ +##===- clang/lib/Analysis/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements analyses built on top of source-level CFGs. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME := clangAnalysis +BUILD_ARCHIVE = 1 +CXXFLAGS = -fno-rtti + +CPPFLAGS += -I$(PROJ_SRC_DIR)/../../include + +include $(LEVEL)/Makefile.common + diff --git a/clang/lib/Analysis/ProgramPoint.cpp b/clang/lib/Analysis/ProgramPoint.cpp new file mode 100644 index 00000000000..c089e486988 --- /dev/null +++ b/clang/lib/Analysis/ProgramPoint.cpp @@ -0,0 +1,65 @@ +//= ProgramPoint.cpp - Program Points for Path-Sensitive Analysis --*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements methods for subclasses of ProgramPoint. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/CFG.h" +#include "clang/Analysis/ProgramPoint.h" + +using namespace clang; + +BlockEdge::BlockEdge(CFG& cfg, const CFGBlock* B1, const CFGBlock* B2) { + if (B1->succ_size() == 1) { + assert (*(B1->succ_begin()) == B2); + Data = reinterpret_cast<uintptr_t>(B1) | BlockEdgeSrcKind; + } + else if (B2->pred_size() == 1) { + assert (*(B2->pred_begin()) == B1); + Data = reinterpret_cast<uintptr_t>(B2) | BlockEdgeDstKind; + } + else + Data = reinterpret_cast<uintptr_t>(cfg.getBlockEdgeImpl(B1,B2)) + | BlockEdgeAuxKind; +} + +CFGBlock* BlockEdge::getSrc() const { + switch (getKind()) { + default: + assert (false && "Invalid BlockEdgeKind."); + return NULL; + + case BlockEdgeSrcKind: + return reinterpret_cast<CFGBlock*>(getRawPtr()); + + case BlockEdgeDstKind: + return *(reinterpret_cast<CFGBlock*>(getRawPtr())->pred_begin()); + + case BlockEdgeAuxKind: + return reinterpret_cast<BPair*>(getRawPtr())->first; + } +} + +CFGBlock* BlockEdge::getDst() const { + switch (getKind()) { + default: + assert (false && "Invalid BlockEdgeKind."); + return NULL; + + case BlockEdgeSrcKind: + return *(reinterpret_cast<CFGBlock*>(getRawPtr())->succ_begin()); + + case BlockEdgeDstKind: + return reinterpret_cast<CFGBlock*>(getRawPtr()); + + case BlockEdgeAuxKind: + return reinterpret_cast<BPair*>(getRawPtr())->second; + } +} diff --git a/clang/lib/Analysis/RValues.cpp b/clang/lib/Analysis/RValues.cpp new file mode 100644 index 00000000000..a4b464949aa --- /dev/null +++ b/clang/lib/Analysis/RValues.cpp @@ -0,0 +1,389 @@ +//= RValues.cpp - Abstract RValues for Path-Sens. Value Tracking -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines RVal, LVal, and NonLVal, classes that represent +// abstract r-values for use with path-sensitive value tracking. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/PathSensitive/RValues.h" +#include "llvm/Support/Streams.h" + +using namespace clang; +using llvm::dyn_cast; +using llvm::cast; +using llvm::APSInt; + +//===----------------------------------------------------------------------===// +// Symbol Iteration. +//===----------------------------------------------------------------------===// + +RVal::symbol_iterator RVal::symbol_begin() const { + if (isa<lval::SymbolVal>(this)) + return (symbol_iterator) (&Data); + else if (isa<nonlval::SymbolVal>(this)) + return (symbol_iterator) (&Data); + else if (isa<nonlval::SymIntConstraintVal>(this)) { + const SymIntConstraint& C = + cast<nonlval::SymIntConstraintVal>(this)->getConstraint(); + + return (symbol_iterator) &C.getSymbol(); + } + + return NULL; +} + +RVal::symbol_iterator RVal::symbol_end() const { + symbol_iterator X = symbol_begin(); + return X ? X+1 : NULL; +} + +//===----------------------------------------------------------------------===// +// Transfer function dispatch for Non-LVals. +//===----------------------------------------------------------------------===// + +RVal +nonlval::ConcreteInt::EvalBinOp(BasicValueFactory& BasicVals, BinaryOperator::Opcode Op, + const nonlval::ConcreteInt& R) const { + + const llvm::APSInt* X = BasicVals.EvaluateAPSInt(Op, getValue(), R.getValue()); + + if (X) + return nonlval::ConcreteInt(*X); + else + return UndefinedVal(); +} + + + // Bitwise-Complement. + +nonlval::ConcreteInt +nonlval::ConcreteInt::EvalComplement(BasicValueFactory& BasicVals) const { + return BasicVals.getValue(~getValue()); +} + + // Unary Minus. + +nonlval::ConcreteInt +nonlval::ConcreteInt::EvalMinus(BasicValueFactory& BasicVals, UnaryOperator* U) const { + assert (U->getType() == U->getSubExpr()->getType()); + assert (U->getType()->isIntegerType()); + return BasicVals.getValue(-getValue()); +} + +//===----------------------------------------------------------------------===// +// Transfer function dispatch for LVals. +//===----------------------------------------------------------------------===// + +RVal +lval::ConcreteInt::EvalBinOp(BasicValueFactory& BasicVals, BinaryOperator::Opcode Op, + const lval::ConcreteInt& R) const { + + assert (Op == BinaryOperator::Add || Op == BinaryOperator::Sub || + (Op >= BinaryOperator::LT && Op <= BinaryOperator::NE)); + + const llvm::APSInt* X = BasicVals.EvaluateAPSInt(Op, getValue(), R.getValue()); + + if (X) + return lval::ConcreteInt(*X); + else + return UndefinedVal(); +} + +NonLVal LVal::EQ(BasicValueFactory& BasicVals, const LVal& R) const { + + switch (getSubKind()) { + default: + assert(false && "EQ not implemented for this LVal."); + break; + + case lval::ConcreteIntKind: + if (isa<lval::ConcreteInt>(R)) { + bool b = cast<lval::ConcreteInt>(this)->getValue() == + cast<lval::ConcreteInt>(R).getValue(); + + return NonLVal::MakeIntTruthVal(BasicVals, b); + } + else if (isa<lval::SymbolVal>(R)) { + + const SymIntConstraint& C = + BasicVals.getConstraint(cast<lval::SymbolVal>(R).getSymbol(), + BinaryOperator::EQ, + cast<lval::ConcreteInt>(this)->getValue()); + + return nonlval::SymIntConstraintVal(C); + } + + break; + + case lval::SymbolValKind: { + if (isa<lval::ConcreteInt>(R)) { + + const SymIntConstraint& C = + BasicVals.getConstraint(cast<lval::SymbolVal>(this)->getSymbol(), + BinaryOperator::EQ, + cast<lval::ConcreteInt>(R).getValue()); + + return nonlval::SymIntConstraintVal(C); + } + + assert (!isa<lval::SymbolVal>(R) && "FIXME: Implement unification."); + + break; + } + + case lval::DeclValKind: + if (isa<lval::DeclVal>(R)) { + bool b = cast<lval::DeclVal>(*this) == cast<lval::DeclVal>(R); + return NonLVal::MakeIntTruthVal(BasicVals, b); + } + + break; + } + + return NonLVal::MakeIntTruthVal(BasicVals, false); +} + +NonLVal LVal::NE(BasicValueFactory& BasicVals, const LVal& R) const { + switch (getSubKind()) { + default: + assert(false && "NE not implemented for this LVal."); + break; + + case lval::ConcreteIntKind: + if (isa<lval::ConcreteInt>(R)) { + bool b = cast<lval::ConcreteInt>(this)->getValue() != + cast<lval::ConcreteInt>(R).getValue(); + + return NonLVal::MakeIntTruthVal(BasicVals, b); + } + else if (isa<lval::SymbolVal>(R)) { + + const SymIntConstraint& C = + BasicVals.getConstraint(cast<lval::SymbolVal>(R).getSymbol(), + BinaryOperator::NE, + cast<lval::ConcreteInt>(this)->getValue()); + + return nonlval::SymIntConstraintVal(C); + } + + break; + + case lval::SymbolValKind: { + if (isa<lval::ConcreteInt>(R)) { + + const SymIntConstraint& C = + BasicVals.getConstraint(cast<lval::SymbolVal>(this)->getSymbol(), + BinaryOperator::NE, + cast<lval::ConcreteInt>(R).getValue()); + + return nonlval::SymIntConstraintVal(C); + } + + assert (!isa<lval::SymbolVal>(R) && "FIXME: Implement sym !=."); + + break; + } + + case lval::DeclValKind: + if (isa<lval::DeclVal>(R)) { + bool b = cast<lval::DeclVal>(*this) == cast<lval::DeclVal>(R); + return NonLVal::MakeIntTruthVal(BasicVals, b); + } + + break; + } + + return NonLVal::MakeIntTruthVal(BasicVals, true); +} + +//===----------------------------------------------------------------------===// +// Utility methods for constructing Non-LVals. +//===----------------------------------------------------------------------===// + +NonLVal NonLVal::MakeVal(BasicValueFactory& BasicVals, uint64_t X, QualType T) { + return nonlval::ConcreteInt(BasicVals.getValue(X, T)); +} + +NonLVal NonLVal::MakeVal(BasicValueFactory& BasicVals, IntegerLiteral* I) { + + return nonlval::ConcreteInt(BasicVals.getValue(APSInt(I->getValue(), + I->getType()->isUnsignedIntegerType()))); +} + +NonLVal NonLVal::MakeIntTruthVal(BasicValueFactory& BasicVals, bool b) { + return nonlval::ConcreteInt(BasicVals.getTruthValue(b)); +} + +RVal RVal::GetSymbolValue(SymbolManager& SymMgr, VarDecl* D) { + + QualType T = D->getType(); + + if (T->isPointerType() || T->isReferenceType()) + return lval::SymbolVal(SymMgr.getSymbol(D)); + else + return nonlval::SymbolVal(SymMgr.getSymbol(D)); +} + +//===----------------------------------------------------------------------===// +// Utility methods for constructing LVals. +//===----------------------------------------------------------------------===// + +LVal LVal::MakeVal(AddrLabelExpr* E) { return lval::GotoLabel(E->getLabel()); } + +//===----------------------------------------------------------------------===// +// Utility methods for constructing RVals (both NonLVals and LVals). +//===----------------------------------------------------------------------===// + +RVal RVal::MakeVal(BasicValueFactory& BasicVals, DeclRefExpr* E) { + + ValueDecl* D = cast<DeclRefExpr>(E)->getDecl(); + + if (VarDecl* VD = dyn_cast<VarDecl>(D)) { + return lval::DeclVal(VD); + } + else if (EnumConstantDecl* ED = dyn_cast<EnumConstantDecl>(D)) { + + // FIXME: Do we need to cache a copy of this enum, since it + // already has persistent storage? We do this because we + // are comparing states using pointer equality. Perhaps there is + // a better way, since APInts are fairly lightweight. + + return nonlval::ConcreteInt(BasicVals.getValue(ED->getInitVal())); + } + else if (FunctionDecl* FD = dyn_cast<FunctionDecl>(D)) { + return lval::FuncVal(FD); + } + + assert (false && + "ValueDecl support for this ValueDecl not implemented."); + + return UnknownVal(); +} + +//===----------------------------------------------------------------------===// +// Pretty-Printing. +//===----------------------------------------------------------------------===// + +void RVal::printStdErr() const { print(*llvm::cerr.stream()); } + +void RVal::print(std::ostream& Out) const { + + switch (getBaseKind()) { + + case UnknownKind: + Out << "Invalid"; break; + + case NonLValKind: + cast<NonLVal>(this)->print(Out); break; + + case LValKind: + cast<LVal>(this)->print(Out); break; + + case UndefinedKind: + Out << "Undefined"; break; + + default: + assert (false && "Invalid RVal."); + } +} + +static void printOpcode(std::ostream& Out, BinaryOperator::Opcode Op) { + + switch (Op) { + case BinaryOperator::Mul: Out << '*' ; break; + case BinaryOperator::Div: Out << '/' ; break; + case BinaryOperator::Rem: Out << '%' ; break; + case BinaryOperator::Add: Out << '+' ; break; + case BinaryOperator::Sub: Out << '-' ; break; + case BinaryOperator::Shl: Out << "<<" ; break; + case BinaryOperator::Shr: Out << ">>" ; break; + case BinaryOperator::LT: Out << "<" ; break; + case BinaryOperator::GT: Out << '>' ; break; + case BinaryOperator::LE: Out << "<=" ; break; + case BinaryOperator::GE: Out << ">=" ; break; + case BinaryOperator::EQ: Out << "==" ; break; + case BinaryOperator::NE: Out << "!=" ; break; + case BinaryOperator::And: Out << '&' ; break; + case BinaryOperator::Xor: Out << '^' ; break; + case BinaryOperator::Or: Out << '|' ; break; + + default: assert(false && "Not yet implemented."); + } +} + +void NonLVal::print(std::ostream& Out) const { + + switch (getSubKind()) { + + case nonlval::ConcreteIntKind: + Out << cast<nonlval::ConcreteInt>(this)->getValue().toString(); + + if (cast<nonlval::ConcreteInt>(this)->getValue().isUnsigned()) + Out << 'U'; + + break; + + case nonlval::SymbolValKind: + Out << '$' << cast<nonlval::SymbolVal>(this)->getSymbol(); + break; + + case nonlval::SymIntConstraintValKind: { + const nonlval::SymIntConstraintVal& C = + *cast<nonlval::SymIntConstraintVal>(this); + + Out << '$' << C.getConstraint().getSymbol() << ' '; + printOpcode(Out, C.getConstraint().getOpcode()); + Out << ' ' << C.getConstraint().getInt().toString(); + + if (C.getConstraint().getInt().isUnsigned()) + Out << 'U'; + + break; + } + + default: + assert (false && "Pretty-printed not implemented for this NonLVal."); + break; + } +} + +void LVal::print(std::ostream& Out) const { + + switch (getSubKind()) { + + case lval::ConcreteIntKind: + Out << cast<lval::ConcreteInt>(this)->getValue().toString() + << " (LVal)"; + break; + + case lval::SymbolValKind: + Out << '$' << cast<lval::SymbolVal>(this)->getSymbol(); + break; + + case lval::GotoLabelKind: + Out << "&&" + << cast<lval::GotoLabel>(this)->getLabel()->getID()->getName(); + break; + + case lval::DeclValKind: + Out << '&' + << cast<lval::DeclVal>(this)->getDecl()->getIdentifier()->getName(); + break; + + case lval::FuncValKind: + Out << "function " + << cast<lval::FuncVal>(this)->getDecl()->getIdentifier()->getName(); + break; + + default: + assert (false && "Pretty-printing not implemented for this LVal."); + break; + } +} diff --git a/clang/lib/Analysis/SymbolManager.cpp b/clang/lib/Analysis/SymbolManager.cpp new file mode 100644 index 00000000000..f243fa667b3 --- /dev/null +++ b/clang/lib/Analysis/SymbolManager.cpp @@ -0,0 +1,124 @@ +//== SymbolManager.h - Management of Symbolic Values ------------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines SymbolManager, a class that manages symbolic values +// created for use by GRExprEngine and related classes. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/PathSensitive/SymbolManager.h" + +using namespace clang; + +SymbolID SymbolManager::getSymbol(VarDecl* D) { + + assert (isa<ParmVarDecl>(D) || D->hasGlobalStorage()); + + llvm::FoldingSetNodeID profile; + + ParmVarDecl* PD = dyn_cast<ParmVarDecl>(D); + + if (PD) + SymbolDataParmVar::Profile(profile, PD); + else + SymbolDataGlobalVar::Profile(profile, D); + + void* InsertPos; + + SymbolData* SD = DataSet.FindNodeOrInsertPos(profile, InsertPos); + + if (SD) + return SD->getSymbol(); + + if (PD) { + SD = (SymbolData*) BPAlloc.Allocate<SymbolDataParmVar>(); + new (SD) SymbolDataParmVar(SymbolCounter, PD); + } + else { + SD = (SymbolData*) BPAlloc.Allocate<SymbolDataGlobalVar>(); + new (SD) SymbolDataGlobalVar(SymbolCounter, D); + } + + DataSet.InsertNode(SD, InsertPos); + + DataMap[SymbolCounter] = SD; + return SymbolCounter++; +} + +SymbolID SymbolManager::getContentsOfSymbol(SymbolID sym) { + + llvm::FoldingSetNodeID profile; + SymbolDataContentsOf::Profile(profile, sym); + void* InsertPos; + + SymbolData* SD = DataSet.FindNodeOrInsertPos(profile, InsertPos); + + if (SD) + return SD->getSymbol(); + + SD = (SymbolData*) BPAlloc.Allocate<SymbolDataContentsOf>(); + new (SD) SymbolDataContentsOf(SymbolCounter, sym); + + + DataSet.InsertNode(SD, InsertPos); + DataMap[SymbolCounter] = SD; + + return SymbolCounter++; +} + +SymbolID SymbolManager::getConjuredSymbol(Expr* E, unsigned Count) { + + llvm::FoldingSetNodeID profile; + SymbolConjured::Profile(profile, E, Count); + void* InsertPos; + + SymbolData* SD = DataSet.FindNodeOrInsertPos(profile, InsertPos); + + if (SD) + return SD->getSymbol(); + + SD = (SymbolData*) BPAlloc.Allocate<SymbolConjured>(); + new (SD) SymbolConjured(SymbolCounter, E, Count); + + DataSet.InsertNode(SD, InsertPos); + DataMap[SymbolCounter] = SD; + + return SymbolCounter++; +} + +const SymbolData& SymbolManager::getSymbolData(SymbolID Sym) const { + DataMapTy::const_iterator I = DataMap.find(Sym); + assert (I != DataMap.end()); + return *I->second; +} + + +QualType SymbolData::getType(const SymbolManager& SymMgr) const { + switch (getKind()) { + default: + assert (false && "getType() not implemented for this symbol."); + + case ParmKind: + return cast<SymbolDataParmVar>(this)->getDecl()->getType(); + + case GlobalKind: + return cast<SymbolDataGlobalVar>(this)->getDecl()->getType(); + + case ContentsOfKind: { + SymbolID x = cast<SymbolDataContentsOf>(this)->getContainerSymbol(); + QualType T = SymMgr.getSymbolData(x).getType(SymMgr); + return T->getAsPointerType()->getPointeeType(); + } + + case ConjuredKind: + return cast<SymbolConjured>(this)->getExpr()->getType(); + } +} + +SymbolManager::~SymbolManager() {} diff --git a/clang/lib/Analysis/UninitializedValues.cpp b/clang/lib/Analysis/UninitializedValues.cpp new file mode 100644 index 00000000000..25a5ecb4837 --- /dev/null +++ b/clang/lib/Analysis/UninitializedValues.cpp @@ -0,0 +1,277 @@ +//==- UninitializedValues.cpp - Find Unintialized Values --------*- C++ --*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements Uninitialized Values analysis for source-level CFGs. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/UninitializedValues.h" +#include "clang/Analysis/Visitors/CFGRecStmtDeclVisitor.h" +#include "clang/Analysis/LocalCheckers.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/AST/ASTContext.h" +#include "clang/Analysis/FlowSensitive/DataflowSolver.h" +#include "llvm/Support/Compiler.h" + +#include "llvm/ADT/SmallPtrSet.h" + +using namespace clang; + +//===----------------------------------------------------------------------===// +// Dataflow initialization logic. +//===----------------------------------------------------------------------===// + +namespace { + +class VISIBILITY_HIDDEN RegisterDecls + : public CFGRecStmtDeclVisitor<RegisterDecls> { + + UninitializedValues::AnalysisDataTy& AD; +public: + RegisterDecls(UninitializedValues::AnalysisDataTy& ad) : AD(ad) {} + + void VisitBlockVarDecl(BlockVarDecl* VD) { AD.Register(VD); } + CFG& getCFG() { return AD.getCFG(); } +}; + +} // end anonymous namespace + +void UninitializedValues::InitializeValues(const CFG& cfg) { + RegisterDecls R(getAnalysisData()); + cfg.VisitBlockStmts(R); +} + +//===----------------------------------------------------------------------===// +// Transfer functions. +//===----------------------------------------------------------------------===// + +namespace { +class VISIBILITY_HIDDEN TransferFuncs + : public CFGStmtVisitor<TransferFuncs,bool> { + + UninitializedValues::ValTy V; + UninitializedValues::AnalysisDataTy& AD; +public: + TransferFuncs(UninitializedValues::AnalysisDataTy& ad) : AD(ad) { + V.resetValues(AD); + } + + UninitializedValues::ValTy& getVal() { return V; } + CFG& getCFG() { return AD.getCFG(); } + + bool VisitDeclRefExpr(DeclRefExpr* DR); + bool VisitBinaryOperator(BinaryOperator* B); + bool VisitUnaryOperator(UnaryOperator* U); + bool VisitStmt(Stmt* S); + bool VisitCallExpr(CallExpr* C); + bool VisitDeclStmt(DeclStmt* D); + bool VisitConditionalOperator(ConditionalOperator* C); + + bool Visit(Stmt *S); + bool BlockStmt_VisitExpr(Expr* E); + + BlockVarDecl* FindBlockVarDecl(Stmt* S); +}; + +static const bool Initialized = true; +static const bool Uninitialized = false; + +bool TransferFuncs::VisitDeclRefExpr(DeclRefExpr* DR) { + if (BlockVarDecl* VD = dyn_cast<BlockVarDecl>(DR->getDecl())) { + if (AD.Observer) AD.Observer->ObserveDeclRefExpr(V,AD,DR,VD); + + // Pseudo-hack to prevent cascade of warnings. If an accessed variable + // is uninitialized, then we are already going to flag a warning for + // this variable, which a "source" of uninitialized values. + // We can otherwise do a full "taint" of uninitialized values. The + // client has both options by toggling AD.FullUninitTaint. + + return AD.FullUninitTaint ? V(VD,AD) : Initialized; + } + else return Initialized; +} + +BlockVarDecl* TransferFuncs::FindBlockVarDecl(Stmt *S) { + for (;;) + if (ParenExpr* P = dyn_cast<ParenExpr>(S)) { + S = P->getSubExpr(); continue; + } + else if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(S)) { + if (BlockVarDecl* VD = dyn_cast<BlockVarDecl>(DR->getDecl())) + return VD; + else + return NULL; + } + else return NULL; +} + +bool TransferFuncs::VisitBinaryOperator(BinaryOperator* B) { + if (BlockVarDecl* VD = FindBlockVarDecl(B->getLHS())) + if (B->isAssignmentOp()) { + if (B->getOpcode() == BinaryOperator::Assign) + return V(VD,AD) = Visit(B->getRHS()); + else // Handle +=, -=, *=, etc. We do want '&', not '&&'. + return V(VD,AD) = Visit(B->getLHS()) & Visit(B->getRHS()); + } + + return VisitStmt(B); +} + +bool TransferFuncs::VisitDeclStmt(DeclStmt* S) { + for (ScopedDecl* D = S->getDecl(); D != NULL; D = D->getNextDeclarator()) + if (BlockVarDecl* VD = dyn_cast<BlockVarDecl>(D)) { + if (Stmt* I = VD->getInit()) + V(VD,AD) = AD.FullUninitTaint ? V(cast<Expr>(I),AD) : Initialized; + else { + // Special case for declarations of array types. For things like: + // + // char x[10]; + // + // we should treat "x" as being initialized, because the variable + // "x" really refers to the memory block. Clearly x[1] is + // uninitialized, but expressions like "(char *) x" really do refer to + // an initialized value. This simple dataflow analysis does not reason + // about the contents of arrays, although it could be potentially + // extended to do so if the array were of constant size. + if (VD->getType()->isArrayType()) + V(VD,AD) = Initialized; + else + V(VD,AD) = Uninitialized; + } + } + + return Uninitialized; // Value is never consumed. +} + +bool TransferFuncs::VisitCallExpr(CallExpr* C) { + VisitChildren(C); + return Initialized; +} + +bool TransferFuncs::VisitUnaryOperator(UnaryOperator* U) { + switch (U->getOpcode()) { + case UnaryOperator::AddrOf: + if (BlockVarDecl* VD = FindBlockVarDecl(U->getSubExpr())) + return V(VD,AD) = Initialized; + + break; + + case UnaryOperator::SizeOf: + return Initialized; + + default: + break; + } + + return Visit(U->getSubExpr()); +} + +bool TransferFuncs::VisitConditionalOperator(ConditionalOperator* C) { + Visit(C->getCond()); + + bool rhsResult = Visit(C->getRHS()); + // Handle the GNU extension for missing LHS. + if (Expr *lhs = C->getLHS()) + return Visit(lhs) & rhsResult; // Yes: we want &, not &&. + else + return rhsResult; +} + +bool TransferFuncs::VisitStmt(Stmt* S) { + bool x = Initialized; + + // We don't stop at the first subexpression that is Uninitialized because + // evaluating some subexpressions may result in propogating "Uninitialized" + // or "Initialized" to variables referenced in the other subexpressions. + for (Stmt::child_iterator I=S->child_begin(), E=S->child_end(); I!=E; ++I) + if (*I && Visit(*I) == Uninitialized) x = Uninitialized; + + return x; +} + +bool TransferFuncs::Visit(Stmt *S) { + if (AD.isTracked(static_cast<Expr*>(S))) return V(static_cast<Expr*>(S),AD); + else return static_cast<CFGStmtVisitor<TransferFuncs,bool>*>(this)->Visit(S); +} + +bool TransferFuncs::BlockStmt_VisitExpr(Expr* E) { + bool x = static_cast<CFGStmtVisitor<TransferFuncs,bool>*>(this)->Visit(E); + if (AD.isTracked(E)) V(E,AD) = x; + return x; +} + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Merge operator. +// +// In our transfer functions we take the approach that any +// combination of unintialized values, e.g. Unitialized + ___ = Unitialized. +// +// Merges take the opposite approach. +// +// In the merge of dataflow values we prefer unsoundness, and +// prefer false negatives to false positives. At merges, if a value for a +// tracked Decl is EVER initialized in any of the predecessors we treat it as +// initialized at the confluence point. +//===----------------------------------------------------------------------===// + +namespace { + typedef ExprDeclBitVector_Types::Union Merge; + typedef DataflowSolver<UninitializedValues,TransferFuncs,Merge> Solver; +} + +//===----------------------------------------------------------------------===// +// Unitialized values checker. Scan an AST and flag variable uses +//===----------------------------------------------------------------------===// + +UninitializedValues_ValueTypes::ObserverTy::~ObserverTy() {} + +namespace { +class VISIBILITY_HIDDEN UninitializedValuesChecker + : public UninitializedValues::ObserverTy { + + ASTContext &Ctx; + Diagnostic &Diags; + llvm::SmallPtrSet<BlockVarDecl*,10> AlreadyWarned; + +public: + UninitializedValuesChecker(ASTContext &ctx, Diagnostic &diags) + : Ctx(ctx), Diags(diags) {} + + virtual void ObserveDeclRefExpr(UninitializedValues::ValTy& V, + UninitializedValues::AnalysisDataTy& AD, + DeclRefExpr* DR, BlockVarDecl* VD) { + + assert ( AD.isTracked(VD) && "Unknown VarDecl."); + + if (V(VD,AD) == Uninitialized) + if (AlreadyWarned.insert(VD)) + Diags.Report(Ctx.getFullLoc(DR->getSourceRange().getBegin()), + diag::warn_uninit_val); + } +}; +} // end anonymous namespace + +namespace clang { +void CheckUninitializedValues(CFG& cfg, ASTContext &Ctx, Diagnostic &Diags, + bool FullUninitTaint) { + + // Compute the unitialized values information. + UninitializedValues U(cfg); + U.getAnalysisData().FullUninitTaint = FullUninitTaint; + Solver S(U); + S.runOnCFG(cfg); + + // Scan for DeclRefExprs that use uninitialized values. + UninitializedValuesChecker Observer(Ctx,Diags); + U.getAnalysisData().Observer = &Observer; + S.runOnAllBlocks(cfg); +} +} // end namespace clang diff --git a/clang/lib/Analysis/ValueState.cpp b/clang/lib/Analysis/ValueState.cpp new file mode 100644 index 00000000000..c0ed7aa882a --- /dev/null +++ b/clang/lib/Analysis/ValueState.cpp @@ -0,0 +1,595 @@ +//= ValueState*cpp - Path-Sens. "State" for tracking valuues -----*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines SymbolID, ExprBindKey, and ValueState* +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/PathSensitive/ValueState.h" +#include "llvm/ADT/SmallSet.h" + +using namespace clang; + +bool ValueState::isNotEqual(SymbolID sym, const llvm::APSInt& V) const { + + // Retrieve the NE-set associated with the given symbol. + ConstNotEqTy::TreeTy* T = ConstNotEq.SlimFind(sym); + + // See if V is present in the NE-set. + return T ? T->getValue().second.contains(&V) : false; +} + +const llvm::APSInt* ValueState::getSymVal(SymbolID sym) const { + ConstEqTy::TreeTy* T = ConstEq.SlimFind(sym); + return T ? T->getValue().second : NULL; +} + +ValueState* +ValueStateManager::RemoveDeadBindings(ValueState* St, Stmt* Loc, + const LiveVariables& Liveness) { + + // This code essentially performs a "mark-and-sweep" of the VariableBindings. + // The roots are any Block-level exprs and Decls that our liveness algorithm + // tells us are live. We then see what Decls they may reference, and keep + // those around. This code more than likely can be made faster, and the + // frequency of which this method is called should be experimented with + // for optimum performance. + + llvm::SmallVector<ValueDecl*, 10> WList; + llvm::SmallPtrSet<ValueDecl*, 10> Marked; + llvm::SmallSet<SymbolID, 20> MarkedSymbols; + + ValueState NewSt = *St; + + // Drop bindings for subexpressions. + NewSt.SubExprBindings = EXFactory.GetEmptyMap(); + + // Iterate over the block-expr bindings. + + for (ValueState::beb_iterator I = St->beb_begin(), E = St->beb_end(); + I!=E ; ++I) { + Expr* BlkExpr = I.getKey(); + + if (Liveness.isLive(Loc, BlkExpr)) { + RVal X = I.getData(); + + if (isa<lval::DeclVal>(X)) { + lval::DeclVal LV = cast<lval::DeclVal>(X); + WList.push_back(LV.getDecl()); + } + + for (RVal::symbol_iterator SI = X.symbol_begin(), SE = X.symbol_end(); + SI != SE; ++SI) { + MarkedSymbols.insert(*SI); + } + } + else { + RVal X = I.getData(); + + if (X.isUndef() && cast<UndefinedVal>(X).getData()) + continue; + + NewSt.BlockExprBindings = Remove(NewSt, BlkExpr); + } + } + + // Iterate over the variable bindings. + + for (ValueState::vb_iterator I = St->vb_begin(), E = St->vb_end(); I!=E ; ++I) + if (Liveness.isLive(Loc, I.getKey())) { + WList.push_back(I.getKey()); + + RVal X = I.getData(); + + for (RVal::symbol_iterator SI = X.symbol_begin(), SE = X.symbol_end(); + SI != SE; ++SI) { + MarkedSymbols.insert(*SI); + } + } + + // Perform the mark-and-sweep. + + while (!WList.empty()) { + + ValueDecl* V = WList.back(); + WList.pop_back(); + + if (Marked.count(V)) + continue; + + Marked.insert(V); + + if (V->getType()->isPointerType()) { + + RVal X = GetRVal(St, lval::DeclVal(cast<VarDecl>(V))); + + if (X.isUnknownOrUndef()) + continue; + + LVal LV = cast<LVal>(X); + + for (RVal::symbol_iterator SI = LV.symbol_begin(), SE = LV.symbol_end(); + SI != SE; ++SI) { + MarkedSymbols.insert(*SI); + } + + if (!isa<lval::DeclVal>(LV)) + continue; + + const lval::DeclVal& LVD = cast<lval::DeclVal>(LV); + WList.push_back(LVD.getDecl()); + } + } + + // Remove dead variable bindings. + for (ValueState::vb_iterator I = St->vb_begin(), E = St->vb_end(); I!=E ; ++I) + if (!Marked.count(I.getKey())) + NewSt.VarBindings = Remove(NewSt, I.getKey()); + + // Remove dead symbols. + for (ValueState::ce_iterator I = St->ce_begin(), E=St->ce_end(); I!=E; ++I) + if (!MarkedSymbols.count(I.getKey())) + NewSt.ConstEq = CEFactory.Remove(NewSt.ConstEq, I.getKey()); + + for (ValueState::cne_iterator I = St->cne_begin(), E=St->cne_end(); I!=E; ++I) + if (!MarkedSymbols.count(I.getKey())) + NewSt.ConstNotEq = CNEFactory.Remove(NewSt.ConstNotEq, I.getKey()); + + return getPersistentState(NewSt); +} + + +RVal ValueStateManager::GetRVal(ValueState* St, LVal LV, QualType T) { + + if (isa<UnknownVal>(LV)) + return UnknownVal(); + + assert (!isa<UndefinedVal>(LV)); + + switch (LV.getSubKind()) { + case lval::DeclValKind: { + ValueState::VarBindingsTy::TreeTy* T = + St->VarBindings.SlimFind(cast<lval::DeclVal>(LV).getDecl()); + + return T ? T->getValue().second : UnknownVal(); + } + + // FIXME: We should limit how far a "ContentsOf" will go... + + case lval::SymbolValKind: { + + + // FIXME: This is a broken representation of memory, and is prone + // to crashing the analyzer when addresses to symbolic values are + // passed through casts. We need a better representation of symbolic + // memory (or just memory in general); probably we should do this + // as a plugin class (similar to GRTransferFuncs). + +#if 0 + const lval::SymbolVal& SV = cast<lval::SymbolVal>(LV); + assert (T.getTypePtr()); + + // Punt on "symbolic" function pointers. + if (T->isFunctionType()) + return UnknownVal(); + + if (T->isPointerType()) + return lval::SymbolVal(SymMgr.getContentsOfSymbol(SV.getSymbol())); + else + return nonlval::SymbolVal(SymMgr.getContentsOfSymbol(SV.getSymbol())); +#endif + + return UnknownVal(); + } + + default: + assert (false && "Invalid LVal."); + break; + } + + return UnknownVal(); +} + +ValueState* ValueStateManager::AddNE(ValueState* St, SymbolID sym, + const llvm::APSInt& V) { + + // First, retrieve the NE-set associated with the given symbol. + ValueState::ConstNotEqTy::TreeTy* T = St->ConstNotEq.SlimFind(sym); + ValueState::IntSetTy S = T ? T->getValue().second : ISetFactory.GetEmptySet(); + + // Now add V to the NE set. + S = ISetFactory.Add(S, &V); + + // Create a new state with the old binding replaced. + ValueState NewSt = *St; + NewSt.ConstNotEq = CNEFactory.Add(NewSt.ConstNotEq, sym, S); + + // Get the persistent copy. + return getPersistentState(NewSt); +} + +ValueState* ValueStateManager::AddEQ(ValueState* St, SymbolID sym, + const llvm::APSInt& V) { + + // Create a new state with the old binding replaced. + ValueState NewSt = *St; + NewSt.ConstEq = CEFactory.Add(NewSt.ConstEq, sym, &V); + + // Get the persistent copy. + return getPersistentState(NewSt); +} + +RVal ValueStateManager::GetRVal(ValueState* St, Expr* E) { + + for (;;) { + + switch (E->getStmtClass()) { + + case Stmt::AddrLabelExprClass: + return LVal::MakeVal(cast<AddrLabelExpr>(E)); + + // ParenExprs are no-ops. + + case Stmt::ParenExprClass: + E = cast<ParenExpr>(E)->getSubExpr(); + continue; + + // DeclRefExprs can either evaluate to an LVal or a Non-LVal + // (assuming an implicit "load") depending on the context. In this + // context we assume that we are retrieving the value contained + // within the referenced variables. + + case Stmt::DeclRefExprClass: { + + // Check if this expression is a block-level expression. If so, + // return its value. + ValueState::ExprBindingsTy::TreeTy* T=St->BlockExprBindings.SlimFind(E); + if (T) return T->getValue().second; + + RVal X = RVal::MakeVal(BasicVals, cast<DeclRefExpr>(E)); + return isa<lval::DeclVal>(X) ? GetRVal(St, cast<lval::DeclVal>(X)) : X; + } + + case Stmt::CharacterLiteralClass: { + CharacterLiteral* C = cast<CharacterLiteral>(E); + return NonLVal::MakeVal(BasicVals, C->getValue(), C->getType()); + } + + case Stmt::IntegerLiteralClass: { + return NonLVal::MakeVal(BasicVals, cast<IntegerLiteral>(E)); + } + + // Casts where the source and target type are the same + // are no-ops. We blast through these to get the descendant + // subexpression that has a value. + + case Stmt::ImplicitCastExprClass: { + ImplicitCastExpr* C = cast<ImplicitCastExpr>(E); + QualType CT = C->getType(); + + if (CT->isVoidType()) + return UnknownVal(); + + QualType ST = C->getSubExpr()->getType(); + + if (CT == ST || (CT->isPointerType() && ST->isFunctionType())) { + E = C->getSubExpr(); + continue; + } + + break; + } + + case Stmt::CastExprClass: { + CastExpr* C = cast<CastExpr>(E); + QualType CT = C->getType(); + QualType ST = C->getSubExpr()->getType(); + + if (CT->isVoidType()) + return UnknownVal(); + + if (CT == ST || (CT->isPointerType() && ST->isFunctionType())) { + E = C->getSubExpr(); + continue; + } + + break; + } + + case Stmt::UnaryOperatorClass: { + + UnaryOperator* U = cast<UnaryOperator>(E); + + if (U->getOpcode() == UnaryOperator::Plus) { + E = U->getSubExpr(); + continue; + } + + break; + } + + // Handle all other Expr* using a lookup. + + default: + break; + }; + + break; + } + + ValueState::ExprBindingsTy::TreeTy* T = St->SubExprBindings.SlimFind(E); + + if (T) + return T->getValue().second; + + T = St->BlockExprBindings.SlimFind(E); + return T ? T->getValue().second : UnknownVal(); +} + +RVal ValueStateManager::GetBlkExprRVal(ValueState* St, Expr* E) { + + E = E->IgnoreParens(); + + switch (E->getStmtClass()) { + case Stmt::CharacterLiteralClass: { + CharacterLiteral* C = cast<CharacterLiteral>(E); + return NonLVal::MakeVal(BasicVals, C->getValue(), C->getType()); + } + + case Stmt::IntegerLiteralClass: { + return NonLVal::MakeVal(BasicVals, cast<IntegerLiteral>(E)); + } + + default: { + ValueState::ExprBindingsTy::TreeTy* T = St->BlockExprBindings.SlimFind(E); + return T ? T->getValue().second : UnknownVal(); + } + } +} + +RVal ValueStateManager::GetLVal(ValueState* St, Expr* E) { + + E = E->IgnoreParens(); + + if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(E)) { + ValueDecl* VD = DR->getDecl(); + + if (FunctionDecl* FD = dyn_cast<FunctionDecl>(VD)) + return lval::FuncVal(FD); + else + return lval::DeclVal(cast<VarDecl>(DR->getDecl())); + } + + if (UnaryOperator* U = dyn_cast<UnaryOperator>(E)) + if (U->getOpcode() == UnaryOperator::Deref) { + E = U->getSubExpr()->IgnoreParens(); + + if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(E)) { + lval::DeclVal X(cast<VarDecl>(DR->getDecl())); + return GetRVal(St, X); + } + else + return GetRVal(St, E); + } + + return GetRVal(St, E); +} + +ValueState* +ValueStateManager::SetRVal(ValueState* St, Expr* E, RVal V, + bool isBlkExpr, bool Invalidate) { + + assert (E); + + if (V.isUnknown()) { + + if (Invalidate) { + + ValueState NewSt = *St; + + if (isBlkExpr) + NewSt.BlockExprBindings = EXFactory.Remove(NewSt.BlockExprBindings, E); + else + NewSt.SubExprBindings = EXFactory.Remove(NewSt.SubExprBindings, E); + + return getPersistentState(NewSt); + } + + return St; + } + + ValueState NewSt = *St; + + if (isBlkExpr) { + NewSt.BlockExprBindings = EXFactory.Add(NewSt.BlockExprBindings, E, V); + } + else { + NewSt.SubExprBindings = EXFactory.Add(NewSt.SubExprBindings, E, V); + } + + return getPersistentState(NewSt); +} + + +ValueState* ValueStateManager::SetRVal(ValueState* St, LVal LV, RVal V) { + + switch (LV.getSubKind()) { + + case lval::DeclValKind: + return V.isUnknown() + ? UnbindVar(St, cast<lval::DeclVal>(LV).getDecl()) + : BindVar(St, cast<lval::DeclVal>(LV).getDecl(), V); + + default: + assert ("SetRVal for given LVal type not yet implemented."); + return St; + } +} + +void ValueStateManager::BindVar(ValueState& StImpl, VarDecl* D, RVal V) { + StImpl.VarBindings = VBFactory.Add(StImpl.VarBindings, D, V); +} + +ValueState* ValueStateManager::BindVar(ValueState* St, VarDecl* D, RVal V) { + + // Create a new state with the old binding removed. + ValueState NewSt = *St; + NewSt.VarBindings = VBFactory.Add(NewSt.VarBindings, D, V); + + // Get the persistent copy. + return getPersistentState(NewSt); +} + +ValueState* ValueStateManager::UnbindVar(ValueState* St, VarDecl* D) { + + // Create a new state with the old binding removed. + ValueState NewSt = *St; + NewSt.VarBindings = VBFactory.Remove(NewSt.VarBindings, D); + + // Get the persistent copy. + return getPersistentState(NewSt); +} + +void ValueStateManager::Unbind(ValueState& StImpl, LVal LV) { + + if (isa<lval::DeclVal>(LV)) + StImpl.VarBindings = VBFactory.Remove(StImpl.VarBindings, + cast<lval::DeclVal>(LV).getDecl()); + +} + +ValueState* ValueStateManager::getInitialState() { + + // Create a state with empty variable bindings. + ValueState StateImpl(EXFactory.GetEmptyMap(), + VBFactory.GetEmptyMap(), + CNEFactory.GetEmptyMap(), + CEFactory.GetEmptyMap()); + + return getPersistentState(StateImpl); +} + +ValueState* ValueStateManager::getPersistentState(ValueState& State) { + + llvm::FoldingSetNodeID ID; + State.Profile(ID); + void* InsertPos; + + if (ValueState* I = StateSet.FindNodeOrInsertPos(ID, InsertPos)) + return I; + + ValueState* I = (ValueState*) Alloc.Allocate<ValueState>(); + new (I) ValueState(State); + StateSet.InsertNode(I, InsertPos); + return I; +} + +void ValueState::printDOT(std::ostream& Out, CheckerStatePrinter* P) const { + print(Out, P, "\\l", "\\|"); +} + +void ValueState::printStdErr(CheckerStatePrinter* P) const { + print(*llvm::cerr, P); +} + +void ValueState::print(std::ostream& Out, CheckerStatePrinter* P, + const char* nl, const char* sep) const { + + // Print Variable Bindings + Out << "Variables:" << nl; + + bool isFirst = true; + + for (vb_iterator I = vb_begin(), E = vb_end(); I != E; ++I) { + + if (isFirst) isFirst = false; + else Out << nl; + + Out << ' ' << I.getKey()->getName() << " : "; + I.getData().print(Out); + } + + // Print Subexpression bindings. + + isFirst = true; + + for (seb_iterator I = seb_begin(), E = seb_end(); I != E; ++I) { + + if (isFirst) { + Out << nl << nl << "Sub-Expressions:" << nl; + isFirst = false; + } + else { Out << nl; } + + Out << " (" << (void*) I.getKey() << ") "; + I.getKey()->printPretty(Out); + Out << " : "; + I.getData().print(Out); + } + + // Print block-expression bindings. + + isFirst = true; + + for (beb_iterator I = beb_begin(), E = beb_end(); I != E; ++I) { + + if (isFirst) { + Out << nl << nl << "Block-level Expressions:" << nl; + isFirst = false; + } + else { Out << nl; } + + Out << " (" << (void*) I.getKey() << ") "; + I.getKey()->printPretty(Out); + Out << " : "; + I.getData().print(Out); + } + + // Print equality constraints. + + if (!ConstEq.isEmpty()) { + + Out << nl << sep << "'==' constraints:"; + + for (ConstEqTy::iterator I = ConstEq.begin(), + E = ConstEq.end(); I!=E; ++I) { + + Out << nl << " $" << I.getKey() + << " : " << I.getData()->toString(); + } + } + + // Print != constraints. + + if (!ConstNotEq.isEmpty()) { + + Out << nl << sep << "'!=' constraints:"; + + for (ConstNotEqTy::iterator I = ConstNotEq.begin(), + EI = ConstNotEq.end(); I != EI; ++I) { + + Out << nl << " $" << I.getKey() << " : "; + isFirst = true; + + IntSetTy::iterator J = I.getData().begin(), EJ = I.getData().end(); + + for ( ; J != EJ; ++J) { + if (isFirst) isFirst = false; + else Out << ", "; + + Out << (*J)->toString(); + } + } + } + + // Print checker-specific data. + + if (P && CheckerState) + P->PrintCheckerState(Out, CheckerState, nl, sep); +} diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp new file mode 100644 index 00000000000..f62b8f126cd --- /dev/null +++ b/clang/lib/Basic/Diagnostic.cpp @@ -0,0 +1,232 @@ +//===--- Diagnostic.cpp - C Language Family Diagnostic Handling -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Diagnostic-related interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/SourceLocation.h" +#include <cassert> +#include <vector> +#include <map> +#include <cstring> +using namespace clang; + +//===----------------------------------------------------------------------===// +// Builtin Diagnostic information +//===----------------------------------------------------------------------===// + +/// Flag values for diagnostics. +enum { + // Diagnostic classes. + NOTE = 0x01, + WARNING = 0x02, + EXTENSION = 0x03, + ERROR = 0x04, + class_mask = 0x07 +}; + +/// DiagnosticFlags - A set of flags, or'd together, that describe the +/// diagnostic. +static unsigned char DiagnosticFlags[] = { +#define DIAG(ENUM,FLAGS,DESC) FLAGS, +#include "clang/Basic/DiagnosticKinds.def" + 0 +}; + +/// getDiagClass - Return the class field of the diagnostic. +/// +static unsigned getBuiltinDiagClass(unsigned DiagID) { + assert(DiagID < diag::NUM_BUILTIN_DIAGNOSTICS && + "Diagnostic ID out of range!"); + return DiagnosticFlags[DiagID] & class_mask; +} + +/// DiagnosticText - An english message to print for the diagnostic. These +/// should be localized. +static const char * const DiagnosticText[] = { +#define DIAG(ENUM,FLAGS,DESC) DESC, +#include "clang/Basic/DiagnosticKinds.def" + 0 +}; + +//===----------------------------------------------------------------------===// +// Custom Diagnostic information +//===----------------------------------------------------------------------===// + +namespace clang { + namespace diag { + class CustomDiagInfo { + typedef std::pair<Diagnostic::Level, std::string> DiagDesc; + std::vector<DiagDesc> DiagInfo; + std::map<DiagDesc, unsigned> DiagIDs; + public: + + /// getDescription - Return the description of the specified custom + /// diagnostic. + const char *getDescription(unsigned DiagID) const { + assert(this && DiagID-diag::NUM_BUILTIN_DIAGNOSTICS < DiagInfo.size() && + "Invalid diagnosic ID"); + return DiagInfo[DiagID-diag::NUM_BUILTIN_DIAGNOSTICS].second.c_str(); + } + + /// getLevel - Return the level of the specified custom diagnostic. + Diagnostic::Level getLevel(unsigned DiagID) const { + assert(this && DiagID-diag::NUM_BUILTIN_DIAGNOSTICS < DiagInfo.size() && + "Invalid diagnosic ID"); + return DiagInfo[DiagID-diag::NUM_BUILTIN_DIAGNOSTICS].first; + } + + unsigned getOrCreateDiagID(Diagnostic::Level L, const char *Message) { + DiagDesc D(L, Message); + // Check to see if it already exists. + std::map<DiagDesc, unsigned>::iterator I = DiagIDs.lower_bound(D); + if (I != DiagIDs.end() && I->first == D) + return I->second; + + // If not, assign a new ID. + unsigned ID = DiagInfo.size()+diag::NUM_BUILTIN_DIAGNOSTICS; + DiagIDs.insert(std::make_pair(D, ID)); + DiagInfo.push_back(D); + return ID; + } + }; + + } // end diag namespace +} // end clang namespace + + +//===----------------------------------------------------------------------===// +// Common Diagnostic implementation +//===----------------------------------------------------------------------===// + +Diagnostic::Diagnostic(DiagnosticClient &client) : Client(client) { + WarningsAsErrors = false; + WarnOnExtensions = false; + ErrorOnExtensions = false; + // Clear all mappings, setting them to MAP_DEFAULT. + memset(DiagMappings, 0, sizeof(DiagMappings)); + + ErrorOccurred = false; + NumDiagnostics = 0; + NumErrors = 0; + CustomDiagInfo = 0; +} + +Diagnostic::~Diagnostic() { + delete CustomDiagInfo; +} + +/// getCustomDiagID - Return an ID for a diagnostic with the specified message +/// and level. If this is the first request for this diagnosic, it is +/// registered and created, otherwise the existing ID is returned. +unsigned Diagnostic::getCustomDiagID(Level L, const char *Message) { + if (CustomDiagInfo == 0) + CustomDiagInfo = new diag::CustomDiagInfo(); + return CustomDiagInfo->getOrCreateDiagID(L, Message); +} + + +/// isBuiltinNoteWarningOrExtension - Return true if the unmapped diagnostic +/// level of the specified diagnostic ID is a Note, Warning, or Extension. +/// Note that this only works on builtin diagnostics, not custom ones. +bool Diagnostic::isBuiltinNoteWarningOrExtension(unsigned DiagID) { + return DiagID < diag::NUM_BUILTIN_DIAGNOSTICS && + getBuiltinDiagClass(DiagID) < ERROR; +} + + +/// getDescription - Given a diagnostic ID, return a description of the +/// issue. +const char *Diagnostic::getDescription(unsigned DiagID) { + if (DiagID < diag::NUM_BUILTIN_DIAGNOSTICS) + return DiagnosticText[DiagID]; + else + return CustomDiagInfo->getDescription(DiagID); +} + +/// getDiagnosticLevel - Based on the way the client configured the Diagnostic +/// object, classify the specified diagnostic ID into a Level, consumable by +/// the DiagnosticClient. +Diagnostic::Level Diagnostic::getDiagnosticLevel(unsigned DiagID) const { + // Handle custom diagnostics, which cannot be mapped. + if (DiagID >= diag::NUM_BUILTIN_DIAGNOSTICS) + return CustomDiagInfo->getLevel(DiagID); + + unsigned DiagClass = getBuiltinDiagClass(DiagID); + + // Specific non-error diagnostics may be mapped to various levels from ignored + // to error. + if (DiagClass < ERROR) { + switch (getDiagnosticMapping((diag::kind)DiagID)) { + case diag::MAP_DEFAULT: break; + case diag::MAP_IGNORE: return Ignored; + case diag::MAP_WARNING: DiagClass = WARNING; break; + case diag::MAP_ERROR: DiagClass = ERROR; break; + } + } + + // Map diagnostic classes based on command line argument settings. + if (DiagClass == EXTENSION) { + if (ErrorOnExtensions) + DiagClass = ERROR; + else if (WarnOnExtensions) + DiagClass = WARNING; + else + return Ignored; + } + + // If warnings are to be treated as errors, indicate this as such. + if (DiagClass == WARNING && WarningsAsErrors) + DiagClass = ERROR; + + switch (DiagClass) { + default: assert(0 && "Unknown diagnostic class!"); + case NOTE: return Diagnostic::Note; + case WARNING: return Diagnostic::Warning; + case ERROR: return Diagnostic::Error; + } +} + +/// Report - Issue the message to the client. If the client wants us to stop +/// compilation, return true, otherwise return false. DiagID is a member of +/// the diag::kind enum. +void Diagnostic::Report(FullSourceLoc Pos, unsigned DiagID, + const std::string *Strs, unsigned NumStrs, + const SourceRange *Ranges, unsigned NumRanges) { + + // Figure out the diagnostic level of this message. + Diagnostic::Level DiagLevel = getDiagnosticLevel(DiagID); + + // If the client doesn't care about this message, don't issue it. + if (DiagLevel == Diagnostic::Ignored) + return; + + // If this is not an error and we are in a system header, ignore it. We have + // to check on the original class here, because we also want to ignore + // extensions and warnings in -Werror and -pedantic-errors modes, which *map* + // warnings/extensions to errors. + if (DiagID < diag::NUM_BUILTIN_DIAGNOSTICS && + getBuiltinDiagClass(DiagID) != ERROR && + Client.isInSystemHeader(Pos)) + return; + + if (DiagLevel >= Diagnostic::Error) { + ErrorOccurred = true; + ++NumErrors; + } + + // Finally, report it. + Client.HandleDiagnostic(*this, DiagLevel, Pos, (diag::kind)DiagID, + Strs, NumStrs, Ranges, NumRanges); + ++NumDiagnostics; +} + +DiagnosticClient::~DiagnosticClient() {} diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp new file mode 100644 index 00000000000..cfc08ed084b --- /dev/null +++ b/clang/lib/Basic/FileManager.cpp @@ -0,0 +1,275 @@ +///===--- FileManager.cpp - File System Probing and Caching ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the FileManager interface. +// +//===----------------------------------------------------------------------===// +// +// TODO: This should index all interesting directories with dirent calls. +// getdirentries ? +// opendir/readdir_r/closedir ? +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/FileManager.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Bitcode/Serialize.h" +#include "llvm/Bitcode/Deserialize.h" +#include "llvm/Support/Streams.h" +#include "llvm/Config/config.h" +using namespace clang; + +// FIXME: Enhance libsystem to support inode and other fields. +#include <sys/stat.h> + +#if defined(_MSC_VER) +#define S_ISDIR(s) (_S_IFDIR & s) +#endif + +/// NON_EXISTENT_DIR - A special value distinct from null that is used to +/// represent a dir name that doesn't exist on the disk. +#define NON_EXISTENT_DIR reinterpret_cast<DirectoryEntry*>((intptr_t)-1) + +#ifdef LLVM_ON_WIN32 + +#define IS_DIR_SEPARATOR_CHAR(x) ((x) == '/' || (x) == '\\') + +namespace { + static std::string GetFullPath(const char *relPath) + { + char *absPathStrPtr = _fullpath(NULL, relPath, 0); + assert(absPathStrPtr && "_fullpath() returned NULL!"); + + std::string absPath(absPathStrPtr); + + free(absPathStrPtr); + return absPath; + } +} + +class FileManager::UniqueDirContainer { + /// UniqueDirs - Cache from full path to existing directories/files. + /// + llvm::StringMap<DirectoryEntry> UniqueDirs; + +public: + DirectoryEntry &getDirectory(const char *Name, struct stat &StatBuf) { + std::string FullPath(GetFullPath(Name)); + return UniqueDirs.GetOrCreateValue( + FullPath.c_str(), + FullPath.c_str() + FullPath.size() + ).getValue(); + } + + size_t size() { return UniqueDirs.size(); } +}; + +class FileManager::UniqueFileContainer { + /// UniqueFiles - Cache from full path to existing directories/files. + /// + llvm::StringMap<FileEntry> UniqueFiles; + +public: + FileEntry &getFile(const char *Name, struct stat &StatBuf) { + std::string FullPath(GetFullPath(Name)); + return UniqueFiles.GetOrCreateValue( + FullPath.c_str(), + FullPath.c_str() + FullPath.size() + ).getValue(); + } + + size_t size() { return UniqueFiles.size(); } +}; + +#else + +#define IS_DIR_SEPARATOR_CHAR(x) ((x) == '/') + +class FileManager::UniqueDirContainer { + /// UniqueDirs - Cache from ID's to existing directories/files. + /// + std::map<std::pair<dev_t, ino_t>, DirectoryEntry> UniqueDirs; + +public: + DirectoryEntry &getDirectory(const char *Name, struct stat &StatBuf) { + return UniqueDirs[std::make_pair(StatBuf.st_dev, StatBuf.st_ino)]; + } + + size_t size() { return UniqueDirs.size(); } +}; + +class FileManager::UniqueFileContainer { + /// UniqueFiles - Cache from ID's to existing directories/files. + /// + std::set<FileEntry> UniqueFiles; + +public: + FileEntry &getFile(const char *Name, struct stat &StatBuf) { + return + const_cast<FileEntry&>( + *UniqueFiles.insert(FileEntry(StatBuf.st_dev, + StatBuf.st_ino)).first); + } + + size_t size() { return UniqueFiles.size(); } +}; + +#endif + + +FileManager::FileManager() : UniqueDirs(*new UniqueDirContainer), + UniqueFiles(*new UniqueFileContainer), + DirEntries(64), FileEntries(64), NextFileUID(0) +{ + NumDirLookups = NumFileLookups = 0; + NumDirCacheMisses = NumFileCacheMisses = 0; +} + +FileManager::~FileManager() { + delete &UniqueDirs; + delete &UniqueFiles; +} + + +/// getDirectory - Lookup, cache, and verify the specified directory. This +/// returns null if the directory doesn't exist. +/// +const DirectoryEntry *FileManager::getDirectory(const char *NameStart, + const char *NameEnd) { + ++NumDirLookups; + llvm::StringMapEntry<DirectoryEntry *> &NamedDirEnt = + DirEntries.GetOrCreateValue(NameStart, NameEnd); + + // See if there is already an entry in the map. + if (NamedDirEnt.getValue()) + return NamedDirEnt.getValue() == NON_EXISTENT_DIR + ? 0 : NamedDirEnt.getValue(); + + ++NumDirCacheMisses; + + // By default, initialize it to invalid. + NamedDirEnt.setValue(NON_EXISTENT_DIR); + + // Get the null-terminated directory name as stored as the key of the + // DirEntries map. + const char *InterndDirName = NamedDirEnt.getKeyData(); + + // Check to see if the directory exists. + struct stat StatBuf; + if (stat(InterndDirName, &StatBuf) || // Error stat'ing. + !S_ISDIR(StatBuf.st_mode)) // Not a directory? + return 0; + + // It exists. See if we have already opened a directory with the same inode. + // This occurs when one dir is symlinked to another, for example. + DirectoryEntry &UDE = UniqueDirs.getDirectory(InterndDirName, StatBuf); + + NamedDirEnt.setValue(&UDE); + if (UDE.getName()) // Already have an entry with this inode, return it. + return &UDE; + + // Otherwise, we don't have this directory yet, add it. We use the string + // key from the DirEntries map as the string. + UDE.Name = InterndDirName; + return &UDE; +} + +/// NON_EXISTENT_FILE - A special value distinct from null that is used to +/// represent a filename that doesn't exist on the disk. +#define NON_EXISTENT_FILE reinterpret_cast<FileEntry*>((intptr_t)-1) + +/// getFile - Lookup, cache, and verify the specified file. This returns null +/// if the file doesn't exist. +/// +const FileEntry *FileManager::getFile(const char *NameStart, + const char *NameEnd) { + ++NumFileLookups; + + // See if there is already an entry in the map. + llvm::StringMapEntry<FileEntry *> &NamedFileEnt = + FileEntries.GetOrCreateValue(NameStart, NameEnd); + + // See if there is already an entry in the map. + if (NamedFileEnt.getValue()) + return NamedFileEnt.getValue() == NON_EXISTENT_FILE + ? 0 : NamedFileEnt.getValue(); + + ++NumFileCacheMisses; + + // By default, initialize it to invalid. + NamedFileEnt.setValue(NON_EXISTENT_FILE); + + // Figure out what directory it is in. If the string contains a / in it, + // strip off everything after it. + // FIXME: this logic should be in sys::Path. + const char *SlashPos = NameEnd-1; + while (SlashPos >= NameStart && !IS_DIR_SEPARATOR_CHAR(SlashPos[0])) + --SlashPos; + + const DirectoryEntry *DirInfo; + if (SlashPos < NameStart) { + // Use the current directory if file has no path component. + const char *Name = "."; + DirInfo = getDirectory(Name, Name+1); + } else if (SlashPos == NameEnd-1) + return 0; // If filename ends with a /, it's a directory. + else + DirInfo = getDirectory(NameStart, SlashPos); + + if (DirInfo == 0) // Directory doesn't exist, file can't exist. + return 0; + + // Get the null-terminated file name as stored as the key of the + // FileEntries map. + const char *InterndFileName = NamedFileEnt.getKeyData(); + + // FIXME: Use the directory info to prune this, before doing the stat syscall. + // FIXME: This will reduce the # syscalls. + + // Nope, there isn't. Check to see if the file exists. + struct stat StatBuf; + //llvm::cerr << "STATING: " << Filename; + if (stat(InterndFileName, &StatBuf) || // Error stat'ing. + S_ISDIR(StatBuf.st_mode)) { // A directory? + // If this file doesn't exist, we leave a null in FileEntries for this path. + //llvm::cerr << ": Not existing\n"; + return 0; + } + //llvm::cerr << ": exists\n"; + + // It exists. See if we have already opened a file with the same inode. + // This occurs when one dir is symlinked to another, for example. + FileEntry &UFE = UniqueFiles.getFile(InterndFileName, StatBuf); + + NamedFileEnt.setValue(&UFE); + if (UFE.getName()) // Already have an entry with this inode, return it. + return &UFE; + + // Otherwise, we don't have this directory yet, add it. + // FIXME: Change the name to be a char* that points back to the 'FileEntries' + // key. + UFE.Name = InterndFileName; + UFE.Size = StatBuf.st_size; + UFE.ModTime = StatBuf.st_mtime; + UFE.Dir = DirInfo; + UFE.UID = NextFileUID++; + return &UFE; +} + +void FileManager::PrintStats() const { + llvm::cerr << "\n*** File Manager Stats:\n"; + llvm::cerr << UniqueFiles.size() << " files found, " + << UniqueDirs.size() << " dirs found.\n"; + llvm::cerr << NumDirLookups << " dir lookups, " + << NumDirCacheMisses << " dir cache misses.\n"; + llvm::cerr << NumFileLookups << " file lookups, " + << NumFileCacheMisses << " file cache misses.\n"; + + //llvm::cerr << PagesMapped << BytesOfPagesMapped << FSLookups; +} diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp new file mode 100644 index 00000000000..65e984a0f78 --- /dev/null +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -0,0 +1,551 @@ +//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the IdentifierInfo, IdentifierVisitor, and +// IdentifierTable interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LangOptions.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Bitcode/Serialize.h" +#include "llvm/Bitcode/Deserialize.h" + +using namespace clang; + +//===----------------------------------------------------------------------===// +// IdentifierInfo Implementation +//===----------------------------------------------------------------------===// + +IdentifierInfo::IdentifierInfo() { + TokenID = tok::identifier; + ObjCID = tok::objc_not_keyword; + BuiltinID = 0; + HasMacro = false; + IsExtension = false; + IsPoisoned = false; + IsCPPOperatorKeyword = false; + FETokenInfo = 0; +} + +//===----------------------------------------------------------------------===// +// IdentifierTable Implementation +//===----------------------------------------------------------------------===// + +IdentifierTable::IdentifierTable(const LangOptions &LangOpts) + // Start with space for 8K identifiers. + : HashTable(8192) { + + // Populate the identifier table with info about keywords for the current + // language. + AddKeywords(LangOpts); +} + +// This cstor is intended to be used only for serialization. +IdentifierTable::IdentifierTable() : HashTable(8192) {} + +//===----------------------------------------------------------------------===// +// Language Keyword Implementation +//===----------------------------------------------------------------------===// + +/// AddKeyword - This method is used to associate a token ID with specific +/// identifiers because they are language keywords. This causes the lexer to +/// automatically map matching identifiers to specialized token codes. +/// +/// The C90/C99/CPP/CPP0x flags are set to 0 if the token should be +/// enabled in the specified langauge, set to 1 if it is an extension +/// in the specified language, and set to 2 if disabled in the +/// specified language. +static void AddKeyword(const char *Keyword, unsigned KWLen, + tok::TokenKind TokenCode, + int C90, int C99, int CXX, int CXX0x, int BoolSupport, + const LangOptions &LangOpts, IdentifierTable &Table) { + int Flags = 0; + if (BoolSupport != 0) { + Flags = LangOpts.Boolean ? BoolSupport : 2; + } else if (LangOpts.CPlusPlus) { + Flags = LangOpts.CPlusPlus0x ? CXX0x : CXX; + } else if (LangOpts.C99) { + Flags = C99; + } else { + Flags = C90; + } + + // Don't add this keyword if disabled in this language or if an extension + // and extensions are disabled. + if (Flags + LangOpts.NoExtensions >= 2) return; + + IdentifierInfo &Info = Table.get(Keyword, Keyword+KWLen); + Info.setTokenID(TokenCode); + Info.setIsExtensionToken(Flags == 1); +} + +static void AddAlias(const char *Keyword, unsigned KWLen, + tok::TokenKind AliaseeID, + const char *AliaseeKeyword, unsigned AliaseeKWLen, + const LangOptions &LangOpts, IdentifierTable &Table) { + IdentifierInfo &AliasInfo = Table.get(Keyword, Keyword+KWLen); + IdentifierInfo &AliaseeInfo = Table.get(AliaseeKeyword, + AliaseeKeyword+AliaseeKWLen); + AliasInfo.setTokenID(AliaseeID); + AliasInfo.setIsExtensionToken(AliaseeInfo.isExtensionToken()); +} + +/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative +/// representations. +static void AddCXXOperatorKeyword(const char *Keyword, unsigned KWLen, + tok::TokenKind TokenCode, + IdentifierTable &Table) { + IdentifierInfo &Info = Table.get(Keyword, Keyword + KWLen); + Info.setTokenID(TokenCode); + Info.setIsCPlusPlusOperatorKeyword(); +} + +/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or +/// "property". +static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID, + const char *Name, unsigned NameLen, + IdentifierTable &Table) { + Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID); +} + +/// AddKeywords - Add all keywords to the symbol table. +/// +void IdentifierTable::AddKeywords(const LangOptions &LangOpts) { + enum { + C90Shift = 0, + EXTC90 = 1 << C90Shift, + NOTC90 = 2 << C90Shift, + C99Shift = 2, + EXTC99 = 1 << C99Shift, + NOTC99 = 2 << C99Shift, + CPPShift = 4, + EXTCPP = 1 << CPPShift, + NOTCPP = 2 << CPPShift, + CPP0xShift = 6, + EXTCPP0x = 1 << CPP0xShift, + NOTCPP0x = 2 << CPP0xShift, + BoolShift = 8, + BOOLSUPPORT = 1 << BoolShift, + Mask = 3 + }; + + // Add keywords and tokens for the current language. +#define KEYWORD(NAME, FLAGS) \ + AddKeyword(#NAME, strlen(#NAME), tok::kw_ ## NAME, \ + ((FLAGS) >> C90Shift) & Mask, \ + ((FLAGS) >> C99Shift) & Mask, \ + ((FLAGS) >> CPPShift) & Mask, \ + ((FLAGS) >> CPP0xShift) & Mask, \ + ((FLAGS) >> BoolShift) & Mask, LangOpts, *this); +#define ALIAS(NAME, TOK) \ + AddAlias(NAME, strlen(NAME), tok::kw_ ## TOK, #TOK, strlen(#TOK), \ + LangOpts, *this); +#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \ + if (LangOpts.CXXOperatorNames) \ + AddCXXOperatorKeyword(#NAME, strlen(#NAME), tok::ALIAS, *this); +#define OBJC1_AT_KEYWORD(NAME) \ + if (LangOpts.ObjC1) \ + AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this); +#define OBJC2_AT_KEYWORD(NAME) \ + if (LangOpts.ObjC2) \ + AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this); +#include "clang/Basic/TokenKinds.def" +} + +tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { + // We use a perfect hash function here involving the length of the keyword, + // the first and third character. For preprocessor ID's there are no + // collisions (if there were, the switch below would complain about duplicate + // case values). Note that this depends on 'if' being null terminated. + +#define HASH(LEN, FIRST, THIRD) \ + (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31) +#define CASE(LEN, FIRST, THIRD, NAME) \ + case HASH(LEN, FIRST, THIRD): \ + return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME + + unsigned Len = getLength(); + if (Len < 2) return tok::pp_not_keyword; + const char *Name = getName(); + switch (HASH(Len, Name[0], Name[2])) { + default: return tok::pp_not_keyword; + CASE( 2, 'i', '\0', if); + CASE( 4, 'e', 'i', elif); + CASE( 4, 'e', 's', else); + CASE( 4, 'l', 'n', line); + CASE( 4, 's', 'c', sccs); + CASE( 5, 'e', 'd', endif); + CASE( 5, 'e', 'r', error); + CASE( 5, 'i', 'e', ident); + CASE( 5, 'i', 'd', ifdef); + CASE( 5, 'u', 'd', undef); + + CASE( 6, 'a', 's', assert); + CASE( 6, 'd', 'f', define); + CASE( 6, 'i', 'n', ifndef); + CASE( 6, 'i', 'p', import); + CASE( 6, 'p', 'a', pragma); + + CASE( 7, 'd', 'f', defined); + CASE( 7, 'i', 'c', include); + CASE( 7, 'w', 'r', warning); + + CASE( 8, 'u', 'a', unassert); + CASE(12, 'i', 'c', include_next); +#undef CASE +#undef HASH + } +} + +//===----------------------------------------------------------------------===// +// Stats Implementation +//===----------------------------------------------------------------------===// + +/// PrintStats - Print statistics about how well the identifier table is doing +/// at hashing identifiers. +void IdentifierTable::PrintStats() const { + unsigned NumBuckets = HashTable.getNumBuckets(); + unsigned NumIdentifiers = HashTable.getNumItems(); + unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers; + unsigned AverageIdentifierSize = 0; + unsigned MaxIdentifierLength = 0; + + // TODO: Figure out maximum times an identifier had to probe for -stats. + for (llvm::StringMap<IdentifierInfo, llvm::BumpPtrAllocator>::const_iterator + I = HashTable.begin(), E = HashTable.end(); I != E; ++I) { + unsigned IdLen = I->getKeyLength(); + AverageIdentifierSize += IdLen; + if (MaxIdentifierLength < IdLen) + MaxIdentifierLength = IdLen; + } + + fprintf(stderr, "\n*** Identifier Table Stats:\n"); + fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers); + fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets); + fprintf(stderr, "Hash density (#identifiers per bucket): %f\n", + NumIdentifiers/(double)NumBuckets); + fprintf(stderr, "Ave identifier length: %f\n", + (AverageIdentifierSize/(double)NumIdentifiers)); + fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength); + + // Compute statistics about the memory allocated for identifiers. + HashTable.getAllocator().PrintStats(); +} + +//===----------------------------------------------------------------------===// +// SelectorTable Implementation +//===----------------------------------------------------------------------===// + +unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) { + return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr()); +} + + +/// MultiKeywordSelector - One of these variable length records is kept for each +/// selector containing more than one keyword. We use a folding set +/// to unique aggregate names (keyword selectors in ObjC parlance). Access to +/// this class is provided strictly through Selector. +namespace clang { +class MultiKeywordSelector : public llvm::FoldingSetNode { + friend SelectorTable* SelectorTable::CreateAndRegister(llvm::Deserializer&); + MultiKeywordSelector(unsigned nKeys) : NumArgs(nKeys) {} +public: + unsigned NumArgs; + + // Constructor for keyword selectors. + MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) { + assert((nKeys > 1) && "not a multi-keyword selector"); + NumArgs = nKeys; + + // Fill in the trailing keyword array. + IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this+1); + for (unsigned i = 0; i != nKeys; ++i) + KeyInfo[i] = IIV[i]; + } + + // getName - Derive the full selector name and return it. + std::string getName() const; + + unsigned getNumArgs() const { return NumArgs; } + + typedef IdentifierInfo *const *keyword_iterator; + keyword_iterator keyword_begin() const { + return reinterpret_cast<keyword_iterator>(this+1); + } + keyword_iterator keyword_end() const { + return keyword_begin()+NumArgs; + } + IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const { + assert(i < NumArgs && "getIdentifierInfoForSlot(): illegal index"); + return keyword_begin()[i]; + } + static void Profile(llvm::FoldingSetNodeID &ID, + keyword_iterator ArgTys, unsigned NumArgs) { + ID.AddInteger(NumArgs); + for (unsigned i = 0; i != NumArgs; ++i) + ID.AddPointer(ArgTys[i]); + } + void Profile(llvm::FoldingSetNodeID &ID) { + Profile(ID, keyword_begin(), NumArgs); + } +}; +} // end namespace clang. + +unsigned Selector::getNumArgs() const { + unsigned IIF = getIdentifierInfoFlag(); + if (IIF == ZeroArg) + return 0; + if (IIF == OneArg) + return 1; + // We point to a MultiKeywordSelector (pointer doesn't contain any flags). + MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr); + return SI->getNumArgs(); +} + +IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const { + if (IdentifierInfo *II = getAsIdentifierInfo()) { + assert(argIndex == 0 && "illegal keyword index"); + return II; + } + // We point to a MultiKeywordSelector (pointer doesn't contain any flags). + MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr); + return SI->getIdentifierInfoForSlot(argIndex); +} + +std::string MultiKeywordSelector::getName() const { + std::string Result; + unsigned Length = 0; + for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) { + if (*I) + Length += (*I)->getLength(); + ++Length; // : + } + + Result.reserve(Length); + + for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) { + if (*I) + Result.insert(Result.end(), (*I)->getName(), + (*I)->getName()+(*I)->getLength()); + Result.push_back(':'); + } + + return Result; +} + +std::string Selector::getName() const { + if (IdentifierInfo *II = getAsIdentifierInfo()) { + if (getNumArgs() == 0) + return II->getName(); + + std::string Res = II->getName(); + Res += ":"; + return Res; + } + + // We have a multiple keyword selector (no embedded flags). + return reinterpret_cast<MultiKeywordSelector *>(InfoPtr)->getName(); +} + + +Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) { + if (nKeys < 2) + return Selector(IIV[0], nKeys); + + llvm::FoldingSet<MultiKeywordSelector> *SelTab; + + SelTab = static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl); + + // Unique selector, to guarantee there is one per name. + llvm::FoldingSetNodeID ID; + MultiKeywordSelector::Profile(ID, IIV, nKeys); + + void *InsertPos = 0; + if (MultiKeywordSelector *SI = SelTab->FindNodeOrInsertPos(ID, InsertPos)) + return Selector(SI); + + // MultiKeywordSelector objects are not allocated with new because they have a + // variable size array (for parameter types) at the end of them. + MultiKeywordSelector *SI = + (MultiKeywordSelector*)malloc(sizeof(MultiKeywordSelector) + + nKeys*sizeof(IdentifierInfo *)); + new (SI) MultiKeywordSelector(nKeys, IIV); + SelTab->InsertNode(SI, InsertPos); + return Selector(SI); +} + +SelectorTable::SelectorTable() { + Impl = new llvm::FoldingSet<MultiKeywordSelector>; +} + +SelectorTable::~SelectorTable() { + delete static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl); +} + +//===----------------------------------------------------------------------===// +// Serialization for IdentifierInfo and IdentifierTable. +//===----------------------------------------------------------------------===// + +void IdentifierInfo::Emit(llvm::Serializer& S) const { + S.EmitInt(getTokenID()); + S.EmitInt(getBuiltinID()); + S.EmitInt(getObjCKeywordID()); + S.EmitBool(hasMacroDefinition()); + S.EmitBool(isExtensionToken()); + S.EmitBool(isPoisoned()); + S.EmitBool(isCPlusPlusOperatorKeyword()); + // FIXME: FETokenInfo +} + +void IdentifierInfo::Read(llvm::Deserializer& D) { + setTokenID((tok::TokenKind) D.ReadInt()); + setBuiltinID(D.ReadInt()); + setObjCKeywordID((tok::ObjCKeywordKind) D.ReadInt()); + setHasMacroDefinition(D.ReadBool()); + setIsExtensionToken(D.ReadBool()); + setIsPoisoned(D.ReadBool()); + setIsCPlusPlusOperatorKeyword(D.ReadBool()); + // FIXME: FETokenInfo +} + +void IdentifierTable::Emit(llvm::Serializer& S) const { + S.EnterBlock(); + + S.EmitPtr(this); + + for (iterator I=begin(), E=end(); I != E; ++I) { + const char* Key = I->getKeyData(); + const IdentifierInfo* Info = &I->getValue(); + + bool KeyRegistered = S.isRegistered(Key); + bool InfoRegistered = S.isRegistered(Info); + + if (KeyRegistered || InfoRegistered) { + // These acrobatics are so that we don't incur the cost of registering + // a pointer with the backpatcher during deserialization if nobody + // references the object. + S.EmitPtr(InfoRegistered ? Info : NULL); + S.EmitPtr(KeyRegistered ? Key : NULL); + S.EmitCStr(Key); + S.Emit(*Info); + } + } + + S.ExitBlock(); +} + +IdentifierTable* IdentifierTable::CreateAndRegister(llvm::Deserializer& D) { + llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); + + std::vector<char> buff; + buff.reserve(200); + + IdentifierTable* t = new IdentifierTable(); + D.RegisterPtr(t); + + while (!D.FinishedBlock(BLoc)) { + llvm::SerializedPtrID InfoPtrID = D.ReadPtrID(); + llvm::SerializedPtrID KeyPtrID = D.ReadPtrID(); + + D.ReadCStr(buff); + + llvm::StringMapEntry<IdentifierInfo>& Entry = + t->HashTable.GetOrCreateValue(&buff[0],&buff[0]+buff.size()); + + D.Read(Entry.getValue()); + + if (InfoPtrID) + D.RegisterRef(InfoPtrID,Entry.getValue()); + + if (KeyPtrID) + D.RegisterPtr(KeyPtrID,Entry.getKeyData()); + } + + return t; +} + +//===----------------------------------------------------------------------===// +// Serialization for Selector and SelectorTable. +//===----------------------------------------------------------------------===// + +void Selector::Emit(llvm::Serializer& S) const { + S.EmitInt(getIdentifierInfoFlag()); + S.EmitPtr(reinterpret_cast<void*>(InfoPtr & ~ArgFlags)); +} + +Selector Selector::ReadVal(llvm::Deserializer& D) { + unsigned flag = D.ReadInt(); + + uintptr_t ptr; + D.ReadUIntPtr(ptr,false); // No backpatching. + + return Selector(ptr | flag); +} + +void SelectorTable::Emit(llvm::Serializer& S) const { + typedef llvm::FoldingSet<MultiKeywordSelector>::iterator iterator; + llvm::FoldingSet<MultiKeywordSelector> *SelTab; + SelTab = static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl); + + S.EnterBlock(); + + S.EmitPtr(this); + + for (iterator I=SelTab->begin(), E=SelTab->end(); I != E; ++I) { + if (!S.isRegistered(&*I)) + continue; + + S.FlushRecord(); // Start a new record. + + S.EmitPtr(&*I); + S.EmitInt(I->getNumArgs()); + + for (MultiKeywordSelector::keyword_iterator KI = I->keyword_begin(), + KE = I->keyword_end(); KI != KE; ++KI) + S.EmitPtr(*KI); + } + + S.ExitBlock(); +} + +SelectorTable* SelectorTable::CreateAndRegister(llvm::Deserializer& D) { + llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); + + SelectorTable* t = new SelectorTable(); + D.RegisterPtr(t); + + llvm::FoldingSet<MultiKeywordSelector>& SelTab = + *static_cast<llvm::FoldingSet<MultiKeywordSelector>*>(t->Impl); + + while (!D.FinishedBlock(BLoc)) { + + llvm::SerializedPtrID PtrID = D.ReadPtrID(); + unsigned nKeys = D.ReadInt(); + + MultiKeywordSelector *SI = + (MultiKeywordSelector*)malloc(sizeof(MultiKeywordSelector) + + nKeys*sizeof(IdentifierInfo *)); + + new (SI) MultiKeywordSelector(nKeys); + + D.RegisterPtr(PtrID,SI); + + IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(SI+1); + + for (unsigned i = 0; i != nKeys; ++i) + D.ReadPtr(KeyInfo[i],false); + + SelTab.GetOrInsertNode(SI); + } + + return t; +} diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp new file mode 100644 index 00000000000..f7fd91fbda5 --- /dev/null +++ b/clang/lib/Basic/LangOptions.cpp @@ -0,0 +1,58 @@ +//===--- LangOptions.cpp - Language feature info --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the methods for LangOptions. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/LangOptions.h" +#include "llvm/Bitcode/Serialize.h" +#include "llvm/Bitcode/Deserialize.h" + +using namespace clang; + +void LangOptions::Emit(llvm::Serializer& S) const { + S.EmitBool((bool) Trigraphs); + S.EmitBool((bool) BCPLComment); + S.EmitBool((bool) DollarIdents); + S.EmitBool((bool) Digraphs); + S.EmitBool((bool) HexFloats); + S.EmitBool((bool) C99); + S.EmitBool((bool) Microsoft); + S.EmitBool((bool) CPlusPlus); + S.EmitBool((bool) CPlusPlus0x); + S.EmitBool((bool) NoExtensions); + S.EmitBool((bool) CXXOperatorNames); + S.EmitBool((bool) ObjC1); + S.EmitBool((bool) ObjC2); + S.EmitBool((bool) PascalStrings); + S.EmitBool((bool) Boolean); + S.EmitBool((bool) WritableStrings); + S.EmitBool((bool) LaxVectorConversions); +} + +void LangOptions::Read(llvm::Deserializer& D) { + Trigraphs = D.ReadBool() ? 1 : 0; + BCPLComment = D.ReadBool() ? 1 : 0; + DollarIdents = D.ReadBool() ? 1 : 0; + Digraphs = D.ReadBool() ? 1 : 0; + HexFloats = D.ReadBool() ? 1 : 0; + C99 = D.ReadBool() ? 1 : 0; + Microsoft = D.ReadBool() ? 1 : 0; + CPlusPlus = D.ReadBool() ? 1 : 0; + CPlusPlus0x = D.ReadBool() ? 1 : 0; + NoExtensions = D.ReadBool() ? 1 : 0; + CXXOperatorNames = D.ReadBool() ? 1 : 0; + ObjC1 = D.ReadBool() ? 1 : 0; + ObjC2 = D.ReadBool() ? 1 : 0; + PascalStrings = D.ReadBool() ? 1 : 0; + Boolean = D.ReadBool() ? 1 : 0; + WritableStrings = D.ReadBool() ? 1 : 0; + LaxVectorConversions = D.ReadBool() ? 1 : 0; +} diff --git a/clang/lib/Basic/Makefile b/clang/lib/Basic/Makefile new file mode 100644 index 00000000000..e95d6dbfa35 --- /dev/null +++ b/clang/lib/Basic/Makefile @@ -0,0 +1,22 @@ +##===- clang/lib/Basic/Makefile ----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements the Basic library for the C-Language front-end. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME := clangBasic +BUILD_ARCHIVE = 1 +CXXFLAGS = -fno-rtti + +CPPFLAGS += -I$(PROJ_SRC_DIR)/../../include + +include $(LEVEL)/Makefile.common + diff --git a/clang/lib/Basic/SourceLocation.cpp b/clang/lib/Basic/SourceLocation.cpp new file mode 100644 index 00000000000..eaf129f251e --- /dev/null +++ b/clang/lib/Basic/SourceLocation.cpp @@ -0,0 +1,79 @@ +//==--- SourceLocation.cpp - Compact identifier for Source Files -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines serialization methods for the SourceLocation class. +// This file defines accessor methods for the FullSourceLoc class. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/Bitcode/Serialize.h" +#include "llvm/Bitcode/Deserialize.h" + +using namespace clang; + +void SourceLocation::Emit(llvm::Serializer& S) const { + S.EmitInt(getRawEncoding()); +} + +SourceLocation SourceLocation::ReadVal(llvm::Deserializer& D) { + return SourceLocation::getFromRawEncoding(D.ReadInt()); +} + +void SourceRange::Emit(llvm::Serializer& S) const { + B.Emit(S); + E.Emit(S); +} + +SourceRange SourceRange::ReadVal(llvm::Deserializer& D) { + SourceLocation A = SourceLocation::ReadVal(D); + SourceLocation B = SourceLocation::ReadVal(D); + return SourceRange(A,B); +} + +FullSourceLoc FullSourceLoc::getLogicalLoc() { + assert (isValid()); + return FullSourceLoc(SrcMgr->getLogicalLoc(Loc),*SrcMgr); +} + +FullSourceLoc FullSourceLoc::getIncludeLoc() { + assert (isValid()); + return FullSourceLoc(SrcMgr->getIncludeLoc(Loc),*SrcMgr); +} + +unsigned FullSourceLoc::getLineNumber() { + assert (isValid()); + return SrcMgr->getLineNumber(Loc); +} + +unsigned FullSourceLoc::getColumnNumber() { + assert (isValid()); + return SrcMgr->getColumnNumber(Loc); +} + +const char* FullSourceLoc::getSourceName() const { + assert (isValid()); + return SrcMgr->getSourceName(Loc); +} + +const FileEntry* FullSourceLoc::getFileEntryForLoc() const { + assert (isValid()); + return SrcMgr->getFileEntryForLoc(Loc); +} + +const char * FullSourceLoc::getCharacterData() const { + assert (isValid()); + return SrcMgr->getCharacterData(Loc); +} + +const llvm::MemoryBuffer* FullSourceLoc::getBuffer() const { + assert (isValid()); + return SrcMgr->getBuffer(Loc.getFileID()); +} diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp new file mode 100644 index 00000000000..73ac2abe26f --- /dev/null +++ b/clang/lib/Basic/SourceManager.cpp @@ -0,0 +1,574 @@ +//===--- SourceManager.cpp - Track and cache source files -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SourceManager interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/FileManager.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/System/Path.h" +#include "llvm/Bitcode/Serialize.h" +#include "llvm/Bitcode/Deserialize.h" +#include "llvm/Support/Streams.h" +#include <algorithm> +#include <fcntl.h> +using namespace clang; +using namespace SrcMgr; +using llvm::MemoryBuffer; + +ContentCache::~ContentCache() { + delete Buffer; + delete [] SourceLineCache; +} + +// FIXME: REMOVE THESE +#include <unistd.h> +#include <sys/types.h> +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <sys/uio.h> +#include <sys/fcntl.h> +#else +#include <io.h> +#endif +#include <cerrno> + +static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) { +#if 0 + // FIXME: Reintroduce this and zap this function once the common llvm stuff + // is fast for the small case. + return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()), + FileEnt->getSize()); +#endif + + // If the file is larger than some threshold, use 'read', otherwise use mmap. + if (FileEnt->getSize() >= 4096*4) + return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()), + 0, FileEnt->getSize()); + + MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(), + FileEnt->getName()); + char *BufPtr = const_cast<char*>(SB->getBufferStart()); + +#if defined(LLVM_ON_WIN32) + int FD = ::open(FileEnt->getName(), O_RDONLY|O_BINARY); +#else + int FD = ::open(FileEnt->getName(), O_RDONLY); +#endif + if (FD == -1) { + delete SB; + return 0; + } + + unsigned BytesLeft = FileEnt->getSize(); + while (BytesLeft) { + ssize_t NumRead = ::read(FD, BufPtr, BytesLeft); + if (NumRead != -1) { + BytesLeft -= NumRead; + BufPtr += NumRead; + } else if (errno == EINTR) { + // try again + } else { + // error reading. + close(FD); + delete SB; + return 0; + } + } + close(FD); + + return SB; +} + + +/// getFileInfo - Create or return a cached FileInfo for the specified file. +/// +const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) { + + assert(FileEnt && "Didn't specify a file entry to use?"); + // Do we already have information about this file? + std::set<ContentCache>::iterator I = + FileInfos.lower_bound(ContentCache(FileEnt)); + + if (I != FileInfos.end() && I->Entry == FileEnt) + return &*I; + + // Nope, get information. + const MemoryBuffer *File = ReadFileFast(FileEnt); + if (File == 0) + return 0; + + ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt)); + + Entry.Buffer = File; + Entry.SourceLineCache = 0; + Entry.NumLines = 0; + return &Entry; +} + + +/// createMemBufferContentCache - Create a new ContentCache for the specified +/// memory buffer. This does no caching. +const ContentCache* +SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) { + // Add a new ContentCache to the MemBufferInfos list and return it. We + // must default construct the object first that the instance actually + // stored within MemBufferInfos actually owns the Buffer, and not any + // temporary we would use in the call to "push_back". + MemBufferInfos.push_back(ContentCache()); + ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back()); + Entry.Buffer = Buffer; + return &Entry; +} + + +/// createFileID - Create a new fileID for the specified ContentCache and +/// include position. This works regardless of whether the ContentCache +/// corresponds to a file or some other input source. +unsigned SourceManager::createFileID(const ContentCache *File, + SourceLocation IncludePos) { + // If FileEnt is really large (e.g. it's a large .i file), we may not be able + // to fit an arbitrary position in the file in the FilePos field. To handle + // this, we create one FileID for each chunk of the file that fits in a + // FilePos field. + unsigned FileSize = File->Buffer->getBufferSize(); + if (FileSize+1 < (1 << SourceLocation::FilePosBits)) { + FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File)); + assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) && + "Ran out of file ID's!"); + return FileIDs.size(); + } + + // Create one FileID for each chunk of the file. + unsigned Result = FileIDs.size()+1; + + unsigned ChunkNo = 0; + while (1) { + FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File)); + + if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break; + FileSize -= (1 << SourceLocation::FilePosBits); + } + + assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) && + "Ran out of file ID's!"); + return Result; +} + +/// getInstantiationLoc - Return a new SourceLocation that encodes the fact +/// that a token from physloc PhysLoc should actually be referenced from +/// InstantiationLoc. +SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc, + SourceLocation InstantLoc) { + // The specified source location may be a mapped location, due to a macro + // instantiation or #line directive. Strip off this information to find out + // where the characters are actually located. + PhysLoc = getPhysicalLoc(PhysLoc); + + // Resolve InstantLoc down to a real logical location. + InstantLoc = getLogicalLoc(InstantLoc); + + + // If the last macro id is close to the currently requested location, try to + // reuse it. This implements a small cache. + for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){ + MacroIDInfo &LastOne = MacroIDs[i]; + + // The instanitation point and source physloc have to exactly match to reuse + // (for now). We could allow "nearby" instantiations in the future. + if (LastOne.getVirtualLoc() != InstantLoc || + LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID()) + continue; + + // Check to see if the physloc of the token came from near enough to reuse. + int PhysDelta = PhysLoc.getRawFilePos() - + LastOne.getPhysicalLoc().getRawFilePos(); + if (SourceLocation::isValidMacroPhysOffs(PhysDelta)) + return SourceLocation::getMacroLoc(i, PhysDelta); + } + + + MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc)); + return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0); +} + +/// getBufferData - Return a pointer to the start and end of the character +/// data for the specified FileID. +std::pair<const char*, const char*> +SourceManager::getBufferData(unsigned FileID) const { + const llvm::MemoryBuffer *Buf = getBuffer(FileID); + return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd()); +} + + +/// getCharacterData - Return a pointer to the start of the specified location +/// in the appropriate MemoryBuffer. +const char *SourceManager::getCharacterData(SourceLocation SL) const { + // Note that this is a hot function in the getSpelling() path, which is + // heavily used by -E mode. + SL = getPhysicalLoc(SL); + + return getContentCache(SL.getFileID())->Buffer->getBufferStart() + + getFullFilePos(SL); +} + + +/// getColumnNumber - Return the column # for the specified file position. +/// this is significantly cheaper to compute than the line number. This returns +/// zero if the column number isn't known. +unsigned SourceManager::getColumnNumber(SourceLocation Loc) const { + unsigned FileID = Loc.getFileID(); + if (FileID == 0) return 0; + + unsigned FilePos = getFullFilePos(Loc); + const MemoryBuffer *Buffer = getBuffer(FileID); + const char *Buf = Buffer->getBufferStart(); + + unsigned LineStart = FilePos; + while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r') + --LineStart; + return FilePos-LineStart+1; +} + +/// getSourceName - This method returns the name of the file or buffer that +/// the SourceLocation specifies. This can be modified with #line directives, +/// etc. +const char *SourceManager::getSourceName(SourceLocation Loc) const { + unsigned FileID = Loc.getFileID(); + if (FileID == 0) return ""; + return getContentCache(FileID)->Buffer->getBufferIdentifier(); +} + +static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE; +static void ComputeLineNumbers(ContentCache* FI) { + const MemoryBuffer *Buffer = FI->Buffer; + + // Find the file offsets of all of the *physical* source lines. This does + // not look at trigraphs, escaped newlines, or anything else tricky. + std::vector<unsigned> LineOffsets; + + // Line #1 starts at char 0. + LineOffsets.push_back(0); + + const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); + const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); + unsigned Offs = 0; + while (1) { + // Skip over the contents of the line. + // TODO: Vectorize this? This is very performance sensitive for programs + // with lots of diagnostics and in -E mode. + const unsigned char *NextBuf = (const unsigned char *)Buf; + while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') + ++NextBuf; + Offs += NextBuf-Buf; + Buf = NextBuf; + + if (Buf[0] == '\n' || Buf[0] == '\r') { + // If this is \n\r or \r\n, skip both characters. + if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) + ++Offs, ++Buf; + ++Offs, ++Buf; + LineOffsets.push_back(Offs); + } else { + // Otherwise, this is a null. If end of file, exit. + if (Buf == End) break; + // Otherwise, skip the null. + ++Offs, ++Buf; + } + } + + // Copy the offsets into the FileInfo structure. + FI->NumLines = LineOffsets.size(); + FI->SourceLineCache = new unsigned[LineOffsets.size()]; + std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache); +} + +/// getLineNumber - Given a SourceLocation, return the physical line number +/// for the position indicated. This requires building and caching a table of +/// line offsets for the MemoryBuffer, so this is not cheap: use only when +/// about to emit a diagnostic. +unsigned SourceManager::getLineNumber(SourceLocation Loc) { + unsigned FileID = Loc.getFileID(); + if (FileID == 0) return 0; + + ContentCache* Content; + + if (LastLineNoFileIDQuery == FileID) + Content = LastLineNoContentCache; + else + Content = const_cast<ContentCache*>(getContentCache(FileID)); + + // If this is the first use of line information for this buffer, compute the + /// SourceLineCache for it on demand. + if (Content->SourceLineCache == 0) + ComputeLineNumbers(Content); + + // Okay, we know we have a line number table. Do a binary search to find the + // line number that this character position lands on. + unsigned *SourceLineCache = Content->SourceLineCache; + unsigned *SourceLineCacheStart = SourceLineCache; + unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines; + + unsigned QueriedFilePos = getFullFilePos(Loc)+1; + + // If the previous query was to the same file, we know both the file pos from + // that query and the line number returned. This allows us to narrow the + // search space from the entire file to something near the match. + if (LastLineNoFileIDQuery == FileID) { + if (QueriedFilePos >= LastLineNoFilePos) { + SourceLineCache = SourceLineCache+LastLineNoResult-1; + + // The query is likely to be nearby the previous one. Here we check to + // see if it is within 5, 10 or 20 lines. It can be far away in cases + // where big comment blocks and vertical whitespace eat up lines but + // contribute no tokens. + if (SourceLineCache+5 < SourceLineCacheEnd) { + if (SourceLineCache[5] > QueriedFilePos) + SourceLineCacheEnd = SourceLineCache+5; + else if (SourceLineCache+10 < SourceLineCacheEnd) { + if (SourceLineCache[10] > QueriedFilePos) + SourceLineCacheEnd = SourceLineCache+10; + else if (SourceLineCache+20 < SourceLineCacheEnd) { + if (SourceLineCache[20] > QueriedFilePos) + SourceLineCacheEnd = SourceLineCache+20; + } + } + } + } else { + SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1; + } + } + + // If the spread is large, do a "radix" test as our initial guess, based on + // the assumption that lines average to approximately the same length. + // NOTE: This is currently disabled, as it does not appear to be profitable in + // initial measurements. + if (0 && SourceLineCacheEnd-SourceLineCache > 20) { + unsigned FileLen = Content->SourceLineCache[Content->NumLines-1]; + + // Take a stab at guessing where it is. + unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen; + + // Check for -10 and +10 lines. + unsigned LowerBound = std::max(int(ApproxPos-10), 0); + unsigned UpperBound = std::min(ApproxPos+10, FileLen); + + // If the computed lower bound is less than the query location, move it in. + if (SourceLineCache < SourceLineCacheStart+LowerBound && + SourceLineCacheStart[LowerBound] < QueriedFilePos) + SourceLineCache = SourceLineCacheStart+LowerBound; + + // If the computed upper bound is greater than the query location, move it. + if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound && + SourceLineCacheStart[UpperBound] >= QueriedFilePos) + SourceLineCacheEnd = SourceLineCacheStart+UpperBound; + } + + unsigned *Pos + = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos); + unsigned LineNo = Pos-SourceLineCacheStart; + + LastLineNoFileIDQuery = FileID; + LastLineNoContentCache = Content; + LastLineNoFilePos = QueriedFilePos; + LastLineNoResult = LineNo; + return LineNo; +} + +/// PrintStats - Print statistics to stderr. +/// +void SourceManager::PrintStats() const { + llvm::cerr << "\n*** Source Manager Stats:\n"; + llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size() + << " mem buffers mapped, " << FileIDs.size() + << " file ID's allocated.\n"; + llvm::cerr << " " << FileIDs.size() << " normal buffer FileID's, " + << MacroIDs.size() << " macro expansion FileID's.\n"; + + unsigned NumLineNumsComputed = 0; + unsigned NumFileBytesMapped = 0; + for (std::set<ContentCache>::const_iterator I = + FileInfos.begin(), E = FileInfos.end(); I != E; ++I) { + NumLineNumsComputed += I->SourceLineCache != 0; + NumFileBytesMapped += I->Buffer->getBufferSize(); + } + + llvm::cerr << NumFileBytesMapped << " bytes of files mapped, " + << NumLineNumsComputed << " files with line #'s computed.\n"; +} + +//===----------------------------------------------------------------------===// +// Serialization. +//===----------------------------------------------------------------------===// + +void ContentCache::Emit(llvm::Serializer& S) const { + S.FlushRecord(); + S.EmitPtr(this); + + if (Entry) { + llvm::sys::Path Fname(Buffer->getBufferIdentifier()); + + if (Fname.isAbsolute()) + S.EmitCStr(Fname.c_str()); + else { + // Create an absolute path. + // FIXME: This will potentially contain ".." and "." in the path. + llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory(); + path.appendComponent(Fname.c_str()); + S.EmitCStr(path.c_str()); + } + } + else { + const char* p = Buffer->getBufferStart(); + const char* e = Buffer->getBufferEnd(); + + S.EmitInt(e-p); + + for ( ; p != e; ++p) + S.EmitInt(*p); + } + + S.FlushRecord(); +} + +void ContentCache::ReadToSourceManager(llvm::Deserializer& D, + SourceManager& SMgr, + FileManager* FMgr, + std::vector<char>& Buf) { + if (FMgr) { + llvm::SerializedPtrID PtrID = D.ReadPtrID(); + D.ReadCStr(Buf,false); + + // Create/fetch the FileEntry. + const char* start = &Buf[0]; + const FileEntry* E = FMgr->getFile(start,start+Buf.size()); + + // FIXME: Ideally we want a lazy materialization of the ContentCache + // anyway, because we don't want to read in source files unless this + // is absolutely needed. + if (!E) + D.RegisterPtr(PtrID,NULL); + else + // Get the ContextCache object and register it with the deserializer. + D.RegisterPtr(PtrID,SMgr.getContentCache(E)); + } + else { + // Register the ContextCache object with the deserializer. + SMgr.MemBufferInfos.push_back(ContentCache()); + ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back()); + D.RegisterPtr(&Entry); + + // Create the buffer. + unsigned Size = D.ReadInt(); + Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size); + + // Read the contents of the buffer. + char* p = const_cast<char*>(Entry.Buffer->getBufferStart()); + for (unsigned i = 0; i < Size ; ++i) + p[i] = D.ReadInt(); + } +} + +void FileIDInfo::Emit(llvm::Serializer& S) const { + S.Emit(IncludeLoc); + S.EmitInt(ChunkNo); + S.EmitPtr(Content); +} + +FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) { + FileIDInfo I; + I.IncludeLoc = SourceLocation::ReadVal(D); + I.ChunkNo = D.ReadInt(); + D.ReadPtr(I.Content,false); + return I; +} + +void MacroIDInfo::Emit(llvm::Serializer& S) const { + S.Emit(VirtualLoc); + S.Emit(PhysicalLoc); +} + +MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) { + MacroIDInfo I; + I.VirtualLoc = SourceLocation::ReadVal(D); + I.PhysicalLoc = SourceLocation::ReadVal(D); + return I; +} + +void SourceManager::Emit(llvm::Serializer& S) const { + S.EnterBlock(); + S.EmitPtr(this); + S.EmitInt(MainFileID); + + // Emit: FileInfos. Just emit the file name. + S.EnterBlock(); + + std::for_each(FileInfos.begin(),FileInfos.end(), + S.MakeEmitter<ContentCache>()); + + S.ExitBlock(); + + // Emit: MemBufferInfos + S.EnterBlock(); + + std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(), + S.MakeEmitter<ContentCache>()); + + S.ExitBlock(); + + // Emit: FileIDs + S.EmitInt(FileIDs.size()); + std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>()); + + // Emit: MacroIDs + S.EmitInt(MacroIDs.size()); + std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>()); + + S.ExitBlock(); +} + +SourceManager* +SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){ + SourceManager *M = new SourceManager(); + D.RegisterPtr(M); + + // Read: the FileID of the main source file of the translation unit. + M->MainFileID = D.ReadInt(); + + std::vector<char> Buf; + + { // Read: FileInfos. + llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); + while (!D.FinishedBlock(BLoc)) + ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf); + } + + { // Read: MemBufferInfos. + llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); + while (!D.FinishedBlock(BLoc)) + ContentCache::ReadToSourceManager(D,*M,NULL,Buf); + } + + // Read: FileIDs. + unsigned Size = D.ReadInt(); + M->FileIDs.reserve(Size); + for (; Size > 0 ; --Size) + M->FileIDs.push_back(FileIDInfo::ReadVal(D)); + + // Read: MacroIDs. + Size = D.ReadInt(); + M->MacroIDs.reserve(Size); + for (; Size > 0 ; --Size) + M->MacroIDs.push_back(MacroIDInfo::ReadVal(D)); + + return M; +} diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp new file mode 100644 index 00000000000..0a561d75cb8 --- /dev/null +++ b/clang/lib/Basic/TargetInfo.cpp @@ -0,0 +1,210 @@ +//===--- TargetInfo.cpp - Information about Target machine ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetInfo and TargetInfoImpl interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/AST/Builtins.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/STLExtras.h" +using namespace clang; + +// TargetInfo Constructor. +TargetInfo::TargetInfo(const std::string &T) : Triple(T) { + // Set defaults. These should be overridden by concrete targets as needed. + CharIsSigned = true; + WCharWidth = WCharAlign = 32; + FloatFormat = &llvm::APFloat::IEEEsingle; + DoubleFormat = &llvm::APFloat::IEEEdouble; + LongDoubleFormat = &llvm::APFloat::IEEEdouble; +} + +// Out of line virtual dtor for TargetInfo. +TargetInfo::~TargetInfo() {} + +//===----------------------------------------------------------------------===// + + +static void removeGCCRegisterPrefix(const char *&Name) { + if (Name[0] == '%' || Name[0] == '#') + Name++; +} + +/// isValidGCCRegisterName - Returns whether the passed in string +/// is a valid register name according to GCC. This is used by Sema for +/// inline asm statements. +bool TargetInfo::isValidGCCRegisterName(const char *Name) const { + const char * const *Names; + unsigned NumNames; + + // Get rid of any register prefix. + removeGCCRegisterPrefix(Name); + + + if (strcmp(Name, "memory") == 0 || + strcmp(Name, "cc") == 0) + return true; + + getGCCRegNames(Names, NumNames); + + // If we have a number it maps to an entry in the register name array. + if (isdigit(Name[0])) { + char *End; + int n = (int)strtol(Name, &End, 0); + if (*End == 0) + return n >= 0 && (unsigned)n < NumNames; + } + + // Check register names. + for (unsigned i = 0; i < NumNames; i++) { + if (strcmp(Name, Names[i]) == 0) + return true; + } + + // Now check aliases. + const GCCRegAlias *Aliases; + unsigned NumAliases; + + getGCCRegAliases(Aliases, NumAliases); + for (unsigned i = 0; i < NumAliases; i++) { + for (unsigned j = 0 ; j < llvm::array_lengthof(Aliases[i].Aliases); j++) { + if (!Aliases[i].Aliases[j]) + break; + if (strcmp(Aliases[i].Aliases[j], Name) == 0) + return true; + } + } + + return false; +} + +const char *TargetInfo::getNormalizedGCCRegisterName(const char *Name) const { + assert(isValidGCCRegisterName(Name) && "Invalid register passed in"); + + removeGCCRegisterPrefix(Name); + + const char * const *Names; + unsigned NumNames; + + getGCCRegNames(Names, NumNames); + + // First, check if we have a number. + if (isdigit(Name[0])) { + char *End; + int n = (int)strtol(Name, &End, 0); + if (*End == 0) { + assert(n >= 0 && (unsigned)n < NumNames && + "Out of bounds register number!"); + return Names[n]; + } + } + + // Now check aliases. + const GCCRegAlias *Aliases; + unsigned NumAliases; + + getGCCRegAliases(Aliases, NumAliases); + for (unsigned i = 0; i < NumAliases; i++) { + for (unsigned j = 0 ; j < llvm::array_lengthof(Aliases[i].Aliases); j++) { + if (!Aliases[i].Aliases[j]) + break; + if (strcmp(Aliases[i].Aliases[j], Name) == 0) + return Aliases[i].Register; + } + } + + return Name; +} + +bool TargetInfo::validateOutputConstraint(const char *Name, + ConstraintInfo &info) const +{ + // An output constraint must start with '=' or '+' + if (*Name != '=' && *Name != '+') + return false; + + if (*Name == '+') + info = CI_ReadWrite; + else + info = CI_None; + + Name++; + while (*Name) { + switch (*Name) { + default: + if (!validateAsmConstraint(*Name, info)) { + // FIXME: This assert is in place temporarily + // so we can add more constraints as we hit it. + // Eventually, an unknown constraint should just be treated as 'g'. + assert(0 && "Unknown output constraint type!"); + } + case '&': // early clobber. + break; + case 'r': // general register. + info = (ConstraintInfo)(info|CI_AllowsRegister); + break; + case 'm': // memory operand. + info = (ConstraintInfo)(info|CI_AllowsMemory); + break; + case 'g': // general register, memory operand or immediate integer. + info = (ConstraintInfo)(info|CI_AllowsMemory|CI_AllowsRegister); + break; + } + + Name++; + } + + return true; +} + +bool TargetInfo::validateInputConstraint(const char *Name, + unsigned NumOutputs, + ConstraintInfo &info) const { + while (*Name) { + switch (*Name) { + default: + // Check if we have a matching constraint + if (*Name >= '0' && *Name <= '9') { + unsigned i = *Name - '0'; + + // Check if matching constraint is out of bounds. + if (i >= NumOutputs) + return false; + } else if (!validateAsmConstraint(*Name, info)) { + // FIXME: This assert is in place temporarily + // so we can add more constraints as we hit it. + // Eventually, an unknown constraint should just be treated as 'g'. + assert(0 && "Unknown input constraint type!"); + } + case '%': // commutative + // FIXME: Fail if % is used with the last operand. + break; + case 'i': // immediate integer. + case 'I': + case 'n': // immediate integer with a known value. + break; + case 'r': // general register. + info = (ConstraintInfo)(info|CI_AllowsRegister); + break; + case 'm': // memory operand. + info = (ConstraintInfo)(info|CI_AllowsMemory); + break; + case 'g': // general register, memory operand or immediate integer. + info = (ConstraintInfo)(info|CI_AllowsMemory|CI_AllowsRegister); + break; + } + + Name++; + } + + return true; +} diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp new file mode 100644 index 00000000000..e8238daeae3 --- /dev/null +++ b/clang/lib/Basic/Targets.cpp @@ -0,0 +1,757 @@ +//===--- Targets.cpp - Implement -arch option and targets -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements construction of a TargetInfo object from a +// target triple. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Builtins.h" +#include "clang/AST/TargetBuiltins.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/STLExtras.h" + +using namespace clang; + +//===----------------------------------------------------------------------===// +// Common code shared among targets. +//===----------------------------------------------------------------------===// + +static void Define(std::vector<char> &Buf, const char *Macro, + const char *Val = "1") { + const char *Def = "#define "; + Buf.insert(Buf.end(), Def, Def+strlen(Def)); + Buf.insert(Buf.end(), Macro, Macro+strlen(Macro)); + Buf.push_back(' '); + Buf.insert(Buf.end(), Val, Val+strlen(Val)); + Buf.push_back('\n'); +} + + +namespace { +class DarwinTargetInfo : public TargetInfo { +public: + DarwinTargetInfo(const std::string& triple) : TargetInfo(triple) {} + + virtual void getTargetDefines(std::vector<char> &Defs) const { +// FIXME: we need a real target configuration system. For now, only define +// __APPLE__ if the host has it. +#ifdef __APPLE__ + Define(Defs, "__APPLE__"); + Define(Defs, "__MACH__"); +#endif + + if (1) {// -fobjc-gc controls this. + Define(Defs, "__weak", ""); + Define(Defs, "__strong", ""); + } else { + Define(Defs, "__weak", "__attribute__((objc_gc(weak)))"); + Define(Defs, "__strong", "__attribute__((objc_gc(strong)))"); + Define(Defs, "__OBJC_GC__"); + } + + // darwin_constant_cfstrings controls this. + Define(Defs, "__CONSTANT_CFSTRINGS__"); + + if (0) // darwin_pascal_strings + Define(Defs, "__PASCAL_STRINGS__"); + } + +}; + + +class SolarisTargetInfo : public TargetInfo { +public: + SolarisTargetInfo(const std::string& triple) : TargetInfo(triple) {} + + virtual void getTargetDefines(std::vector<char> &Defs) const { +// FIXME: we need a real target configuration system. For now, only define +// __SUN__ if the host has it. +#ifdef __SUN__ + Define(Defs, "__SUN__"); + Define(Defs, "__SOLARIS__"); +#endif + + if (1) {// -fobjc-gc controls this. + Define(Defs, "__weak", ""); + Define(Defs, "__strong", ""); + } else { + Define(Defs, "__weak", "__attribute__((objc_gc(weak)))"); + Define(Defs, "__strong", "__attribute__((objc_gc(strong)))"); + Define(Defs, "__OBJC_GC__"); + } + } + +}; +} // end anonymous namespace. + + +/// getPowerPCDefines - Return a set of the PowerPC-specific #defines that are +/// not tied to a specific subtarget. +static void getPowerPCDefines(std::vector<char> &Defs, bool is64Bit) { + // Target identification. + Define(Defs, "__ppc__"); + Define(Defs, "_ARCH_PPC"); + Define(Defs, "__POWERPC__"); + if (is64Bit) { + Define(Defs, "_ARCH_PPC64"); + Define(Defs, "_LP64"); + Define(Defs, "__LP64__"); + Define(Defs, "__ppc64__"); + } else { + Define(Defs, "__ppc__"); + } + + // Target properties. + Define(Defs, "_BIG_ENDIAN"); + Define(Defs, "__BIG_ENDIAN__"); + + if (is64Bit) { + Define(Defs, "__INTMAX_MAX__", "9223372036854775807L"); + Define(Defs, "__INTMAX_TYPE__", "long int"); + Define(Defs, "__LONG_MAX__", "9223372036854775807L"); + Define(Defs, "__PTRDIFF_TYPE__", "long int"); + Define(Defs, "__UINTMAX_TYPE__", "long unsigned int"); + } else { + Define(Defs, "__INTMAX_MAX__", "9223372036854775807LL"); + Define(Defs, "__INTMAX_TYPE__", "long long int"); + Define(Defs, "__LONG_MAX__", "2147483647L"); + Define(Defs, "__PTRDIFF_TYPE__", "int"); + Define(Defs, "__UINTMAX_TYPE__", "long long unsigned int"); + } + Define(Defs, "__INT_MAX__", "2147483647"); + Define(Defs, "__LONG_LONG_MAX__", "9223372036854775807LL"); + Define(Defs, "__CHAR_BIT__", "8"); + Define(Defs, "__SCHAR_MAX__", "127"); + Define(Defs, "__SHRT_MAX__", "32767"); + Define(Defs, "__SIZE_TYPE__", "long unsigned int"); + + // Subtarget options. + Define(Defs, "__USER_LABEL_PREFIX__", "_"); + Define(Defs, "__NATURAL_ALIGNMENT__"); + Define(Defs, "__REGISTER_PREFIX__", ""); + + Define(Defs, "__WCHAR_MAX__", "2147483647"); + Define(Defs, "__WCHAR_TYPE__", "int"); + Define(Defs, "__WINT_TYPE__", "int"); + + // Float macros. + Define(Defs, "__FLT_DENORM_MIN__", "1.40129846e-45F"); + Define(Defs, "__FLT_DIG__", "6"); + Define(Defs, "__FLT_EPSILON__", "1.19209290e-7F"); + Define(Defs, "__FLT_EVAL_METHOD__", "0"); + Define(Defs, "__FLT_HAS_INFINITY__"); + Define(Defs, "__FLT_HAS_QUIET_NAN__"); + Define(Defs, "__FLT_MANT_DIG__", "24"); + Define(Defs, "__FLT_MAX_10_EXP__", "38"); + Define(Defs, "__FLT_MAX_EXP__", "128"); + Define(Defs, "__FLT_MAX__", "3.40282347e+38F"); + Define(Defs, "__FLT_MIN_10_EXP__", "(-37)"); + Define(Defs, "__FLT_MIN_EXP__", "(-125)"); + Define(Defs, "__FLT_MIN__", "1.17549435e-38F"); + Define(Defs, "__FLT_RADIX__", "2"); + + // double macros. + Define(Defs, "__DBL_DENORM_MIN__", "4.9406564584124654e-324"); + Define(Defs, "__DBL_DIG__", "15"); + Define(Defs, "__DBL_EPSILON__", "2.2204460492503131e-16"); + Define(Defs, "__DBL_HAS_INFINITY__"); + Define(Defs, "__DBL_HAS_QUIET_NAN__"); + Define(Defs, "__DBL_MANT_DIG__", "53"); + Define(Defs, "__DBL_MAX_10_EXP__", "308"); + Define(Defs, "__DBL_MAX_EXP__", "1024"); + Define(Defs, "__DBL_MAX__", "1.7976931348623157e+308"); + Define(Defs, "__DBL_MIN_10_EXP__", "(-307)"); + Define(Defs, "__DBL_MIN_EXP__", "(-1021)"); + Define(Defs, "__DBL_MIN__", "2.2250738585072014e-308"); + Define(Defs, "__DECIMAL_DIG__", "33"); + + // 128-bit long double macros. + Define(Defs, "__LDBL_DENORM_MIN__", + "4.94065645841246544176568792868221e-324L"); + Define(Defs, "__LDBL_DIG__", "31"); + Define(Defs, "__LDBL_EPSILON__", + "4.94065645841246544176568792868221e-324L"); + Define(Defs, "__LDBL_HAS_INFINITY__"); + Define(Defs, "__LDBL_HAS_QUIET_NAN__"); + Define(Defs, "__LDBL_MANT_DIG__", "106"); + Define(Defs, "__LDBL_MAX_10_EXP__", "308"); + Define(Defs, "__LDBL_MAX_EXP__", "1024"); + Define(Defs, "__LDBL_MAX__", + "1.79769313486231580793728971405301e+308L"); + Define(Defs, "__LDBL_MIN_10_EXP__", "(-291)"); + Define(Defs, "__LDBL_MIN_EXP__", "(-968)"); + Define(Defs, "__LDBL_MIN__", + "2.00416836000897277799610805135016e-292L"); + Define(Defs, "__LONG_DOUBLE_128__"); +} + +/// getX86Defines - Return a set of the X86-specific #defines that are +/// not tied to a specific subtarget. +static void getX86Defines(std::vector<char> &Defs, bool is64Bit) { + // Target identification. + if (is64Bit) { + Define(Defs, "_LP64"); + Define(Defs, "__LP64__"); + Define(Defs, "__amd64__"); + Define(Defs, "__amd64"); + Define(Defs, "__x86_64"); + Define(Defs, "__x86_64__"); + } else { + Define(Defs, "__i386__"); + Define(Defs, "__i386"); + Define(Defs, "i386"); + } + + // Target properties. + Define(Defs, "__LITTLE_ENDIAN__"); + + if (is64Bit) { + Define(Defs, "__INTMAX_MAX__", "9223372036854775807L"); + Define(Defs, "__INTMAX_TYPE__", "long int"); + Define(Defs, "__LONG_MAX__", "9223372036854775807L"); + Define(Defs, "__PTRDIFF_TYPE__", "long int"); + Define(Defs, "__UINTMAX_TYPE__", "long unsigned int"); + Define(Defs, "__SIZE_TYPE__", "long unsigned int"); + } else { + Define(Defs, "__INTMAX_MAX__", "9223372036854775807LL"); + Define(Defs, "__INTMAX_TYPE__", "long long int"); + Define(Defs, "__LONG_MAX__", "2147483647L"); + Define(Defs, "__PTRDIFF_TYPE__", "int"); + Define(Defs, "__UINTMAX_TYPE__", "long long unsigned int"); + Define(Defs, "__SIZE_TYPE__", "unsigned int"); + } + Define(Defs, "__CHAR_BIT__", "8"); + Define(Defs, "__INT_MAX__", "2147483647"); + Define(Defs, "__LONG_LONG_MAX__", "9223372036854775807LL"); + Define(Defs, "__SCHAR_MAX__", "127"); + Define(Defs, "__SHRT_MAX__", "32767"); + + // Subtarget options. + Define(Defs, "__nocona"); + Define(Defs, "__nocona__"); + Define(Defs, "__tune_nocona__"); + Define(Defs, "__SSE2_MATH__"); + Define(Defs, "__SSE2__"); + Define(Defs, "__SSE_MATH__"); + Define(Defs, "__SSE__"); + Define(Defs, "__MMX__"); + Define(Defs, "__REGISTER_PREFIX__", ""); + + Define(Defs, "__WCHAR_MAX__", "2147483647"); + Define(Defs, "__WCHAR_TYPE__", "int"); + Define(Defs, "__WINT_TYPE__", "int"); + + // Float macros. + Define(Defs, "__FLT_DENORM_MIN__", "1.40129846e-45F"); + Define(Defs, "__FLT_DIG__", "6"); + Define(Defs, "__FLT_EPSILON__", "1.19209290e-7F"); + Define(Defs, "__FLT_EVAL_METHOD__", "0"); + Define(Defs, "__FLT_HAS_INFINITY__"); + Define(Defs, "__FLT_HAS_QUIET_NAN__"); + Define(Defs, "__FLT_MANT_DIG__", "24"); + Define(Defs, "__FLT_MAX_10_EXP__", "38"); + Define(Defs, "__FLT_MAX_EXP__", "128"); + Define(Defs, "__FLT_MAX__", "3.40282347e+38F"); + Define(Defs, "__FLT_MIN_10_EXP__", "(-37)"); + Define(Defs, "__FLT_MIN_EXP__", "(-125)"); + Define(Defs, "__FLT_MIN__", "1.17549435e-38F"); + Define(Defs, "__FLT_RADIX__", "2"); + + // Double macros. + Define(Defs, "__DBL_DENORM_MIN__", "4.9406564584124654e-324"); + Define(Defs, "__DBL_DIG__", "15"); + Define(Defs, "__DBL_EPSILON__", "2.2204460492503131e-16"); + Define(Defs, "__DBL_HAS_INFINITY__"); + Define(Defs, "__DBL_HAS_QUIET_NAN__"); + Define(Defs, "__DBL_MANT_DIG__", "53"); + Define(Defs, "__DBL_MAX_10_EXP__", "308"); + Define(Defs, "__DBL_MAX_EXP__", "1024"); + Define(Defs, "__DBL_MAX__", "1.7976931348623157e+308"); + Define(Defs, "__DBL_MIN_10_EXP__", "(-307)"); + Define(Defs, "__DBL_MIN_EXP__", "(-1021)"); + Define(Defs, "__DBL_MIN__", "2.2250738585072014e-308"); + Define(Defs, "__DECIMAL_DIG__", "21"); + + // 80-bit Long double macros. + Define(Defs, "__LDBL_DENORM_MIN__", "3.64519953188247460253e-4951L"); + Define(Defs, "__LDBL_DIG__", "18"); + Define(Defs, "__LDBL_EPSILON__", "1.08420217248550443401e-19L"); + Define(Defs, "__LDBL_HAS_INFINITY__"); + Define(Defs, "__LDBL_HAS_QUIET_NAN__"); + Define(Defs, "__LDBL_MANT_DIG__", "64"); + Define(Defs, "__LDBL_MAX_10_EXP__", "4932"); + Define(Defs, "__LDBL_MAX_EXP__", "16384"); + Define(Defs, "__LDBL_MAX__", "1.18973149535723176502e+4932L"); + Define(Defs, "__LDBL_MIN_10_EXP__", "(-4931)"); + Define(Defs, "__LDBL_MIN_EXP__", "(-16381)"); + Define(Defs, "__LDBL_MIN__", "3.36210314311209350626e-4932L"); +} + +static const char* getI386VAListDeclaration() { + return "typedef char* __builtin_va_list;"; +} + +static const char* getX86_64VAListDeclaration() { + return + "typedef struct __va_list_tag {" + " unsigned gp_offset;" + " unsigned fp_offset;" + " void* overflow_arg_area;" + " void* reg_save_area;" + "} __builtin_va_list[1];"; +} + +static const char* getPPCVAListDeclaration() { + return + "typedef struct __va_list_tag {" + " unsigned char gpr;" + " unsigned char fpr;" + " unsigned short reserved;" + " void* overflow_arg_area;" + " void* reg_save_area;" + "} __builtin_va_list[1];"; +} + + +/// PPC builtin info. +namespace clang { +namespace PPC { + + static const Builtin::Info BuiltinInfo[] = { +#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS }, +#include "clang/AST/PPCBuiltins.def" + }; + + static void getBuiltins(const Builtin::Info *&Records, unsigned &NumRecords) { + Records = BuiltinInfo; + NumRecords = LastTSBuiltin-Builtin::FirstTSBuiltin; + } + + static const char * const GCCRegNames[] = { + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", "10", "11", "12", "13", "14", "15", + "16", "17", "18", "19", "20", "21", "22", "23", + "24", "25", "26", "27", "28", "29", "30", "31", + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", "10", "11", "12", "13", "14", "15", + "16", "17", "18", "19", "20", "21", "22", "23", + "24", "25", "26", "27", "28", "29", "30", "31", + "mq", "lr", "ctr", "ap", + "0", "1", "2", "3", "4", "5", "6", "7", + "xer", + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", "10", "11", "12", "13", "14", "15", + "16", "17", "18", "19", "20", "21", "22", "23", + "24", "25", "26", "27", "28", "29", "30", "31", + "vrsave", "vscr", + "spe_acc", "spefscr", + "sfp" + }; + + static void getGCCRegNames(const char * const *&Names, + unsigned &NumNames) { + Names = GCCRegNames; + NumNames = llvm::array_lengthof(GCCRegNames); + } + + static const TargetInfo::GCCRegAlias GCCRegAliases[] = { + // While some of these aliases do map to different registers + // they still share the same register name. + { { "cc", "cr0", "fr0", "r0", "v0"}, "0" }, + { { "cr1", "fr1", "r1", "sp", "v1"}, "1" }, + { { "cr2", "fr2", "r2", "toc", "v2"}, "2" }, + { { "cr3", "fr3", "r3", "v3"}, "3" }, + { { "cr4", "fr4", "r4", "v4"}, "4" }, + { { "cr5", "fr5", "r5", "v5"}, "5" }, + { { "cr6", "fr6", "r6", "v6"}, "6" }, + { { "cr7", "fr7", "r7", "v7"}, "7" }, + { { "fr8", "r8", "v8"}, "8" }, + { { "fr9", "r9", "v9"}, "9" }, + { { "fr10", "r10", "v10"}, "10" }, + { { "fr11", "r11", "v11"}, "11" }, + { { "fr12", "r12", "v12"}, "12" }, + { { "fr13", "r13", "v13"}, "13" }, + { { "fr14", "r14", "v14"}, "14" }, + { { "fr15", "r15", "v15"}, "15" }, + { { "fr16", "r16", "v16"}, "16" }, + { { "fr17", "r17", "v17"}, "17" }, + { { "fr18", "r18", "v18"}, "18" }, + { { "fr19", "r19", "v19"}, "19" }, + { { "fr20", "r20", "v20"}, "20" }, + { { "fr21", "r21", "v21"}, "21" }, + { { "fr22", "r22", "v22"}, "22" }, + { { "fr23", "r23", "v23"}, "23" }, + { { "fr24", "r24", "v24"}, "24" }, + { { "fr25", "r25", "v25"}, "25" }, + { { "fr26", "r26", "v26"}, "26" }, + { { "fr27", "r27", "v27"}, "27" }, + { { "fr28", "r28", "v28"}, "28" }, + { { "fr29", "r29", "v29"}, "29" }, + { { "fr30", "r30", "v30"}, "30" }, + { { "fr31", "r31", "v31"}, "31" }, + }; + + static void getGCCRegAliases(const TargetInfo::GCCRegAlias *&Aliases, + unsigned &NumAliases) { + Aliases = GCCRegAliases; + NumAliases = llvm::array_lengthof(GCCRegAliases); + } + + static bool validateAsmConstraint(char c, + TargetInfo::ConstraintInfo &info) { + switch (c) { + default: return false; + case 'O': // Zero + return true; + case 'b': // Base register + case 'f': // Floating point register + info = (TargetInfo::ConstraintInfo)(info|TargetInfo::CI_AllowsRegister); + return true; + } + } + + const char *getClobbers() { + return 0; + } + + const char *getTargetPrefix() { + return "ppc"; + } + +} // End namespace PPC + +/// X86 builtin info. +namespace X86 { + static const Builtin::Info BuiltinInfo[] = { +#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS }, +#include "clang/AST/X86Builtins.def" + }; + + static void getBuiltins(const Builtin::Info *&Records, unsigned &NumRecords) { + Records = BuiltinInfo; + NumRecords = LastTSBuiltin-Builtin::FirstTSBuiltin; + } + + static const char *GCCRegNames[] = { + "ax", "dx", "cx", "bx", "si", "di", "bp", "sp", + "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)", + "argp", "flags", "fspr", "dirflag", "frame", + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", + "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" + }; + + static void getGCCRegNames(const char * const *&Names, + unsigned &NumNames) { + Names = GCCRegNames; + NumNames = llvm::array_lengthof(GCCRegNames); + } + + static const TargetInfo::GCCRegAlias GCCRegAliases[] = { + { { "al", "ah", "eax", "rax" }, "ax" }, + { { "bl", "bh", "ebx", "rbx" }, "bx" }, + { { "cl", "ch", "ecx", "rcx" }, "cx" }, + { { "dl", "dh", "edx", "rdx" }, "dx" }, + { { "esi", "rsi" }, "si" }, + { { "esp", "rsp" }, "sp" }, + { { "ebp", "rbp" }, "bp" }, + }; + + static void getGCCRegAliases(const TargetInfo::GCCRegAlias *&Aliases, + unsigned &NumAliases) { + Aliases = GCCRegAliases; + NumAliases = llvm::array_lengthof(GCCRegAliases); + } + + static bool validateAsmConstraint(char c, + TargetInfo::ConstraintInfo &info) { + switch (c) { + default: return false; + case 'a': // eax. + case 'b': // ebx. + case 'c': // ecx. + case 'd': // edx. + case 'S': // esi. + case 'D': // edi. + case 'A': // edx:eax. + case 't': // top of floating point stack. + case 'u': // second from top of floating point stack. + case 'q': // a, b, c, d registers or any integer register in 64-bit. + case 'Z': // 32-bit integer constant for use with zero-extending x86_64 + // instructions. + case 'N': // unsigned 8-bit integer constant for use with in and out + // instructions. + info = (TargetInfo::ConstraintInfo)(info|TargetInfo::CI_AllowsRegister); + return true; + } + } + + static std::string convertConstraint(const char Constraint) { + switch (Constraint) { + case 'a': return std::string("{ax}"); + case 'b': return std::string("{bx}"); + case 'c': return std::string("{cx}"); + case 'd': return std::string("{dx}"); + case 'S': return std::string("{si}"); + case 'D': return std::string("{di}"); + case 't': // top of floating point stack. + return std::string("{st}"); + case 'u': // second from top of floating point stack. + return std::string("{st(1)}"); // second from top of floating point stack. + default: + return std::string(1, Constraint); + } + } + + const char *getClobbers() { + return "~{dirflag},~{fpsr},~{flags}"; + } + + const char *getTargetPrefix() { + return "x86"; + } + +} // End namespace X86 +} // end namespace clang. + +//===----------------------------------------------------------------------===// +// Specific target implementations. +//===----------------------------------------------------------------------===// + + +namespace { +class DarwinPPCTargetInfo : public DarwinTargetInfo { +public: + DarwinPPCTargetInfo(const std::string& triple) : DarwinTargetInfo(triple) {} + + virtual void getTargetDefines(std::vector<char> &Defines) const { + DarwinTargetInfo::getTargetDefines(Defines); + getPowerPCDefines(Defines, false); + } + virtual void getTargetBuiltins(const Builtin::Info *&Records, + unsigned &NumRecords) const { + PPC::getBuiltins(Records, NumRecords); + } + virtual const char *getVAListDeclaration() const { + return getPPCVAListDeclaration(); + } + virtual const char *getTargetPrefix() const { + return PPC::getTargetPrefix(); + } + virtual void getGCCRegNames(const char * const *&Names, + unsigned &NumNames) const { + PPC::getGCCRegNames(Names, NumNames); + } + virtual void getGCCRegAliases(const GCCRegAlias *&Aliases, + unsigned &NumAliases) const { + PPC::getGCCRegAliases(Aliases, NumAliases); + } + virtual bool validateAsmConstraint(char c, + TargetInfo::ConstraintInfo &info) const { + return PPC::validateAsmConstraint(c, info); + } + virtual const char *getClobbers() const { + return PPC::getClobbers(); + } +}; +} // end anonymous namespace. + +namespace { +class DarwinPPC64TargetInfo : public DarwinTargetInfo { +public: + DarwinPPC64TargetInfo(const std::string& triple) : DarwinTargetInfo(triple) {} + + virtual void getTargetDefines(std::vector<char> &Defines) const { + DarwinTargetInfo::getTargetDefines(Defines); + getPowerPCDefines(Defines, true); + } + virtual void getTargetBuiltins(const Builtin::Info *&Records, + unsigned &NumRecords) const { + PPC::getBuiltins(Records, NumRecords); + } + virtual const char *getVAListDeclaration() const { + return getPPCVAListDeclaration(); + } + virtual const char *getTargetPrefix() const { + return PPC::getTargetPrefix(); + } + virtual void getGCCRegNames(const char * const *&Names, + unsigned &NumNames) const { + PPC::getGCCRegNames(Names, NumNames); + } + virtual void getGCCRegAliases(const GCCRegAlias *&Aliases, + unsigned &NumAliases) const { + PPC::getGCCRegAliases(Aliases, NumAliases); + } + virtual bool validateAsmConstraint(char c, + TargetInfo::ConstraintInfo &info) const { + return PPC::validateAsmConstraint(c, info); + } + virtual const char *getClobbers() const { + return PPC::getClobbers(); + } +}; +} // end anonymous namespace. + +namespace { +class DarwinI386TargetInfo : public DarwinTargetInfo { +public: + DarwinI386TargetInfo(const std::string& triple) : DarwinTargetInfo(triple) {} + + virtual void getTargetDefines(std::vector<char> &Defines) const { + DarwinTargetInfo::getTargetDefines(Defines); + getX86Defines(Defines, false); + } + virtual void getTargetBuiltins(const Builtin::Info *&Records, + unsigned &NumRecords) const { + X86::getBuiltins(Records, NumRecords); + } + virtual const char *getVAListDeclaration() const { + return getI386VAListDeclaration(); + } + virtual const char *getTargetPrefix() const { + return X86::getTargetPrefix(); + } + virtual void getGCCRegNames(const char * const *&Names, + unsigned &NumNames) const { + X86::getGCCRegNames(Names, NumNames); + } + virtual void getGCCRegAliases(const GCCRegAlias *&Aliases, + unsigned &NumAliases) const { + X86::getGCCRegAliases(Aliases, NumAliases); + } + virtual bool validateAsmConstraint(char c, + TargetInfo::ConstraintInfo &info) const { + return X86::validateAsmConstraint(c, info); + } + + virtual std::string convertConstraint(const char Constraint) const { + return X86::convertConstraint(Constraint); + } + + virtual const char *getClobbers() const { + return X86::getClobbers(); + } +}; +} // end anonymous namespace. + +namespace { +class DarwinX86_64TargetInfo : public DarwinTargetInfo { +public: + DarwinX86_64TargetInfo(const std::string& triple) :DarwinTargetInfo(triple) {} + + virtual void getTargetDefines(std::vector<char> &Defines) const { + DarwinTargetInfo::getTargetDefines(Defines); + getX86Defines(Defines, true); + } + virtual void getTargetBuiltins(const Builtin::Info *&Records, + unsigned &NumRecords) const { + X86::getBuiltins(Records, NumRecords); + } + virtual const char *getVAListDeclaration() const { + return getX86_64VAListDeclaration(); + } + virtual const char *getTargetPrefix() const { + return X86::getTargetPrefix(); + } + virtual void getGCCRegNames(const char * const *&Names, + unsigned &NumNames) const { + X86::getGCCRegNames(Names, NumNames); + } + virtual void getGCCRegAliases(const GCCRegAlias *&Aliases, + unsigned &NumAliases) const { + X86::getGCCRegAliases(Aliases, NumAliases); + } + virtual bool validateAsmConstraint(char c, + TargetInfo::ConstraintInfo &info) const { + return X86::validateAsmConstraint(c, info); + } + virtual std::string convertConstraint(const char Constraint) const { + return X86::convertConstraint(Constraint); + } + virtual const char *getClobbers() const { + return X86::getClobbers(); + } +}; +} // end anonymous namespace. + +namespace { +class SolarisSparcV8TargetInfo : public SolarisTargetInfo { +public: + SolarisSparcV8TargetInfo(const std::string& triple) : SolarisTargetInfo(triple) {} + + virtual void getTargetDefines(std::vector<char> &Defines) const { + SolarisTargetInfo::getTargetDefines(Defines); +// getSparcDefines(Defines, false); + Define(Defines, "__sparc"); + Define(Defines, "__sparcv8"); + } + virtual void getTargetBuiltins(const Builtin::Info *&Records, + unsigned &NumRecords) const { + PPC::getBuiltins(Records, NumRecords); + } + virtual const char *getVAListDeclaration() const { + return getPPCVAListDeclaration(); + } + virtual const char *getTargetPrefix() const { + return PPC::getTargetPrefix(); + } + virtual void getGCCRegNames(const char * const *&Names, + unsigned &NumNames) const { + PPC::getGCCRegNames(Names, NumNames); + } + virtual void getGCCRegAliases(const GCCRegAlias *&Aliases, + unsigned &NumAliases) const { + PPC::getGCCRegAliases(Aliases, NumAliases); + } + virtual bool validateAsmConstraint(char c, + TargetInfo::ConstraintInfo &info) const { + return PPC::validateAsmConstraint(c, info); + } + virtual const char *getClobbers() const { + return PPC::getClobbers(); + } +}; + +} // end anonymous namespace. + + +//===----------------------------------------------------------------------===// +// Driver code +//===----------------------------------------------------------------------===// + +static inline bool IsX86(const std::string& TT) { + return (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' && + TT[4] == '-' && TT[1] - '3' < 6); +} + +/// CreateTargetInfo - Return the target info object for the specified target +/// triple. +TargetInfo* TargetInfo::CreateTargetInfo(const std::string &T) { + if (T.find("ppc-") == 0 || T.find("powerpc-") == 0) + return new DarwinPPCTargetInfo(T); + + if (T.find("ppc64-") == 0 || T.find("powerpc64-") == 0) + return new DarwinPPC64TargetInfo(T); + + if (T.find("sparc-") == 0) + return new SolarisSparcV8TargetInfo(T); // ugly hack + + if (T.find("x86_64-") == 0) + return new DarwinX86_64TargetInfo(T); + + if (IsX86(T)) + return new DarwinI386TargetInfo(T); + + return NULL; +} + diff --git a/clang/lib/Basic/TokenKinds.cpp b/clang/lib/Basic/TokenKinds.cpp new file mode 100644 index 00000000000..bde8a5598b4 --- /dev/null +++ b/clang/lib/Basic/TokenKinds.cpp @@ -0,0 +1,29 @@ +//===--- TokenKinds.cpp - Token Kinds Support -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TokenKind enum and support functions. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/TokenKinds.h" + +#include <cassert> +using namespace clang; + +static const char * const TokNames[] = { +#define TOK(X) #X, +#define KEYWORD(X,Y) #X, +#include "clang/Basic/TokenKinds.def" + 0 +}; + +const char *tok::getTokenName(enum TokenKind Kind) { + assert(Kind < tok::NUM_TOKENS); + return TokNames[Kind]; +} diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp new file mode 100644 index 00000000000..83c5e60475c --- /dev/null +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -0,0 +1,486 @@ +//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Builtin calls as LLVM code. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "CodeGenModule.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Builtins.h" +#include "clang/AST/Expr.h" +#include "clang/AST/TargetBuiltins.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +using namespace clang; +using namespace CodeGen; +using namespace llvm; + +RValue CodeGenFunction::EmitBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { + switch (BuiltinID) { + default: { + if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) + return EmitCallExpr(CGM.getBuiltinLibFunction(BuiltinID), + E->getCallee()->getType(), E->arg_begin(), + E->getNumArgs()); + + // See if we have a target specific intrinsic. + Intrinsic::ID IntrinsicID; + const char *TargetPrefix = Target.getTargetPrefix(); + const char *BuiltinName = getContext().BuiltinInfo.GetName(BuiltinID); +#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN +#include "llvm/Intrinsics.gen" +#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN + + if (IntrinsicID != Intrinsic::not_intrinsic) { + SmallVector<Value*, 16> Args; + + Function *F = CGM.getIntrinsic(IntrinsicID); + const llvm::FunctionType *FTy = F->getFunctionType(); + + for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { + Value *ArgValue = EmitScalarExpr(E->getArg(i)); + + // If the intrinsic arg type is different from the builtin arg type + // we need to do a bit cast. + const llvm::Type *PTy = FTy->getParamType(i); + if (PTy != ArgValue->getType()) { + assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && + "Must be able to losslessly bit cast to param"); + ArgValue = Builder.CreateBitCast(ArgValue, PTy); + } + + Args.push_back(ArgValue); + } + + Value *V = Builder.CreateCall(F, &Args[0], &Args[0] + Args.size()); + QualType BuiltinRetType = E->getType(); + + const llvm::Type *RetTy = llvm::Type::VoidTy; + if (!BuiltinRetType->isVoidType()) RetTy = ConvertType(BuiltinRetType); + + if (RetTy != V->getType()) { + assert(V->getType()->canLosslesslyBitCastTo(RetTy) && + "Must be able to losslessly bit cast result type"); + V = Builder.CreateBitCast(V, RetTy); + } + + return RValue::get(V); + } + + // See if we have a target specific builtin that needs to be lowered. + Value *V = 0; + + if (strcmp(TargetPrefix, "x86") == 0) + V = EmitX86BuiltinExpr(BuiltinID, E); + else if (strcmp(TargetPrefix, "ppc") == 0) + V = EmitPPCBuiltinExpr(BuiltinID, E); + + if (V) + return RValue::get(V); + + WarnUnsupported(E, "builtin function"); + + // Unknown builtin, for now just dump it out and return undef. + if (hasAggregateLLVMType(E->getType())) + return RValue::getAggregate(CreateTempAlloca(ConvertType(E->getType()))); + return RValue::get(UndefValue::get(ConvertType(E->getType()))); + } + case Builtin::BI__builtin___CFStringMakeConstantString: { + const Expr *Arg = E->getArg(0); + + while (1) { + if (const ParenExpr *PE = dyn_cast<ParenExpr>(Arg)) + Arg = PE->getSubExpr(); + else if (const ImplicitCastExpr *CE = dyn_cast<ImplicitCastExpr>(Arg)) + Arg = CE->getSubExpr(); + else + break; + } + + const StringLiteral *Literal = cast<StringLiteral>(Arg); + std::string S(Literal->getStrData(), Literal->getByteLength()); + + return RValue::get(CGM.GetAddrOfConstantCFString(S)); + } + case Builtin::BI__builtin_va_start: + case Builtin::BI__builtin_va_end: { + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + const llvm::Type *DestType = + llvm::PointerType::getUnqual(llvm::Type::Int8Ty); + if (ArgValue->getType() != DestType) + ArgValue = Builder.CreateBitCast(ArgValue, DestType, + ArgValue->getNameStart()); + + Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_start) ? + Intrinsic::vastart : Intrinsic::vaend; + return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue)); + } + case Builtin::BI__builtin_va_copy: { + // FIXME: This does not yet handle architectures where va_list is a struct. + Value *DstPtr = EmitScalarExpr(E->getArg(0)); + Value *SrcValue = EmitScalarExpr(E->getArg(1)); + + Value *SrcPtr = CreateTempAlloca(SrcValue->getType(), "dst_ptr"); + + // FIXME: Volatile + Builder.CreateStore(SrcValue, SrcPtr, false); + + const llvm::Type *Type = + llvm::PointerType::getUnqual(llvm::Type::Int8Ty); + + DstPtr = Builder.CreateBitCast(DstPtr, Type); + SrcPtr = Builder.CreateBitCast(SrcPtr, Type); + Value *Args[] = { DstPtr, SrcPtr }; + return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), + &Args[0], &Args[2])); + } + case Builtin::BI__builtin_classify_type: { + APSInt Result(32); + if (!E->isBuiltinClassifyType(Result)) + assert(0 && "Expr not __builtin_classify_type!"); + return RValue::get(ConstantInt::get(Result)); + } + case Builtin::BI__builtin_constant_p: { + APSInt Result(32); + // FIXME: Analyze the parameter and check if it is a constant. + Result = 0; + return RValue::get(ConstantInt::get(Result)); + } + case Builtin::BI__builtin_abs: { + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + + llvm::BinaryOperator *NegOp = + Builder.CreateNeg(ArgValue, (ArgValue->getName() + "neg").c_str()); + Value *CmpResult = + Builder.CreateICmpSGE(ArgValue, NegOp->getOperand(0), "abscond"); + Value *Result = + Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); + + return RValue::get(Result); + } + case Builtin::BI__builtin_ctz: + case Builtin::BI__builtin_ctzl: + case Builtin::BI__builtin_ctzll: { + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + + const llvm::Type *ArgType = ArgValue->getType(); + Value *F = CGM.getIntrinsic(Intrinsic::cttz, &ArgType, 1); + + const llvm::Type *ResultType = ConvertType(E->getType()); + Value *Result = Builder.CreateCall(F, ArgValue, "tmp"); + if (Result->getType() != ResultType) + Result = Builder.CreateIntCast(Result, ResultType, "cast"); + return RValue::get(Result); + } + case Builtin::BI__builtin_expect: + return RValue::get(EmitScalarExpr(E->getArg(0))); + case Builtin::BI__builtin_bswap32: + case Builtin::BI__builtin_bswap64: { + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + const llvm::Type *ArgType = ArgValue->getType(); + Value *F = CGM.getIntrinsic(Intrinsic::bswap, &ArgType, 1); + return RValue::get(Builder.CreateCall(F, ArgValue, "tmp")); + } + case Builtin::BI__builtin_inff: { + APFloat f(APFloat::IEEEsingle, APFloat::fcInfinity, false); + return RValue::get(ConstantFP::get(llvm::Type::FloatTy, f)); + } + case Builtin::BI__builtin_huge_val: + case Builtin::BI__builtin_inf: + // FIXME: mapping long double onto double. + case Builtin::BI__builtin_infl: { + APFloat f(APFloat::IEEEdouble, APFloat::fcInfinity, false); + return RValue::get(ConstantFP::get(llvm::Type::DoubleTy, f)); + } + case Builtin::BI__builtin_isgreater: + case Builtin::BI__builtin_isgreaterequal: + case Builtin::BI__builtin_isless: + case Builtin::BI__builtin_islessequal: + case Builtin::BI__builtin_islessgreater: + case Builtin::BI__builtin_isunordered: { + // Ordered comparisons: we know the arguments to these are matching scalar + // floating point values. + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + + switch (BuiltinID) { + default: assert(0 && "Unknown ordered comparison"); + case Builtin::BI__builtin_isgreater: + LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); + break; + case Builtin::BI__builtin_isgreaterequal: + LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); + break; + case Builtin::BI__builtin_isless: + LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); + break; + case Builtin::BI__builtin_islessequal: + LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); + break; + case Builtin::BI__builtin_islessgreater: + LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); + break; + case Builtin::BI__builtin_isunordered: + LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); + break; + } + // ZExt bool to int type. + return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()), + "tmp")); + } + case Builtin::BI__builtin_alloca: + return RValue::get(Builder.CreateAlloca(llvm::Type::Int8Ty, + EmitScalarExpr(E->getArg(0)), + "tmp")); + } + return RValue::get(0); +} + +Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + + llvm::SmallVector<Value*, 4> Ops; + + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) + Ops.push_back(EmitScalarExpr(E->getArg(i))); + + switch (BuiltinID) { + default: return 0; + case X86::BI__builtin_ia32_mulps: + return Builder.CreateMul(Ops[0], Ops[1], "mulps"); + case X86::BI__builtin_ia32_pand: + return Builder.CreateAnd(Ops[0], Ops[1], "pand"); + case X86::BI__builtin_ia32_por: + return Builder.CreateAnd(Ops[0], Ops[1], "por"); + case X86::BI__builtin_ia32_pxor: + return Builder.CreateAnd(Ops[0], Ops[1], "pxor"); + case X86::BI__builtin_ia32_pandn: { + Ops[0] = Builder.CreateNot(Ops[0], "tmp"); + return Builder.CreateAnd(Ops[0], Ops[1], "pandn"); + } + case X86::BI__builtin_ia32_paddb: + case X86::BI__builtin_ia32_paddd: + case X86::BI__builtin_ia32_paddq: + case X86::BI__builtin_ia32_paddw: + case X86::BI__builtin_ia32_addps: + return Builder.CreateAdd(Ops[0], Ops[1], "add"); + case X86::BI__builtin_ia32_psubb: + case X86::BI__builtin_ia32_psubd: + case X86::BI__builtin_ia32_psubq: + case X86::BI__builtin_ia32_psubw: + case X86::BI__builtin_ia32_subps: + return Builder.CreateSub(Ops[0], Ops[1], "sub"); + case X86::BI__builtin_ia32_divps: + return Builder.CreateFDiv(Ops[0], Ops[1], "divps"); + case X86::BI__builtin_ia32_pmullw: + return Builder.CreateMul(Ops[0], Ops[1], "pmul"); + case X86::BI__builtin_ia32_punpckhbw: + return EmitShuffleVector(Ops[0], Ops[1], 4, 12, 5, 13, 6, 14, 7, 15, + "punpckhbw"); + case X86::BI__builtin_ia32_punpckhwd: + return EmitShuffleVector(Ops[0], Ops[1], 2, 6, 3, 7, "punpckhwd"); + case X86::BI__builtin_ia32_punpckhdq: + return EmitShuffleVector(Ops[0], Ops[1], 1, 3, "punpckhdq"); + case X86::BI__builtin_ia32_punpcklbw: + return EmitShuffleVector(Ops[0], Ops[1], 0, 8, 1, 9, 2, 10, 3, 11, + "punpcklbw"); + case X86::BI__builtin_ia32_punpcklwd: + return EmitShuffleVector(Ops[0], Ops[1], 0, 4, 1, 5, "punpcklwd"); + case X86::BI__builtin_ia32_punpckldq: + return EmitShuffleVector(Ops[0], Ops[1], 0, 2, "punpckldq"); + case X86::BI__builtin_ia32_pslldi: + case X86::BI__builtin_ia32_psllqi: + case X86::BI__builtin_ia32_psllwi: + case X86::BI__builtin_ia32_psradi: + case X86::BI__builtin_ia32_psrawi: + case X86::BI__builtin_ia32_psrldi: + case X86::BI__builtin_ia32_psrlqi: + case X86::BI__builtin_ia32_psrlwi: { + Ops[1] = Builder.CreateZExt(Ops[1], llvm::Type::Int64Ty, "zext"); + const llvm::Type *Ty = llvm::VectorType::get(llvm::Type::Int64Ty, 1); + Ops[1] = Builder.CreateBitCast(Ops[1], Ty, "bitcast"); + const char *name = 0; + Intrinsic::ID ID = Intrinsic::not_intrinsic; + + switch (BuiltinID) { + default: assert(0 && "Unsupported shift intrinsic!"); + case X86::BI__builtin_ia32_pslldi: + name = "pslldi"; + ID = Intrinsic::x86_mmx_psll_d; + break; + case X86::BI__builtin_ia32_psllqi: + name = "psllqi"; + ID = Intrinsic::x86_mmx_psll_q; + break; + case X86::BI__builtin_ia32_psllwi: + name = "psllwi"; + ID = Intrinsic::x86_mmx_psll_w; + break; + case X86::BI__builtin_ia32_psradi: + name = "psradi"; + ID = Intrinsic::x86_mmx_psra_d; + break; + case X86::BI__builtin_ia32_psrawi: + name = "psrawi"; + ID = Intrinsic::x86_mmx_psra_w; + break; + case X86::BI__builtin_ia32_psrldi: + name = "psrldi"; + ID = Intrinsic::x86_mmx_psrl_d; + break; + case X86::BI__builtin_ia32_psrlqi: + name = "psrlqi"; + ID = Intrinsic::x86_mmx_psrl_q; + break; + case X86::BI__builtin_ia32_psrlwi: + name = "psrlwi"; + ID = Intrinsic::x86_mmx_psrl_w; + break; + } + llvm::Function *F = CGM.getIntrinsic(ID); + return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name); + } + case X86::BI__builtin_ia32_pshufd: { + unsigned i = cast<ConstantInt>(Ops[1])->getZExtValue(); + return EmitShuffleVector(Ops[0], Ops[0], + i & 0x3, (i & 0xc) >> 2, + (i & 0x30) >> 4, (i & 0xc0) >> 6, + "pshufd"); + } + case X86::BI__builtin_ia32_vec_init_v4hi: + case X86::BI__builtin_ia32_vec_init_v8qi: + case X86::BI__builtin_ia32_vec_init_v2si: + return EmitVector(&Ops[0], Ops.size()); + case X86::BI__builtin_ia32_vec_ext_v2si: + return Builder.CreateExtractElement(Ops[0], Ops[1], "result"); + case X86::BI__builtin_ia32_cmpordss: + case X86::BI__builtin_ia32_cmpunordss: + case X86::BI__builtin_ia32_cmpeqss: + case X86::BI__builtin_ia32_cmpltss: + case X86::BI__builtin_ia32_cmpless: + case X86::BI__builtin_ia32_cmpneqss: + case X86::BI__builtin_ia32_cmpnltss: + case X86::BI__builtin_ia32_cmpnless: { + unsigned i = 0; + const char *name = 0; + switch (BuiltinID) { + default: assert(0 && "Unknown compare builtin!"); + case X86::BI__builtin_ia32_cmpeqss: + i = 0; + name = "cmpeqss"; + break; + case X86::BI__builtin_ia32_cmpltss: + i = 1; + name = "cmpltss"; + break; + case X86::BI__builtin_ia32_cmpless: + i = 2; + name = "cmpless"; + break; + case X86::BI__builtin_ia32_cmpunordss: + i = 3; + name = "cmpunordss"; + break; + case X86::BI__builtin_ia32_cmpneqss: + i = 4; + name = "cmpneqss"; + break; + case X86::BI__builtin_ia32_cmpnltss: + i = 5; + name = "cmpntlss"; + break; + case X86::BI__builtin_ia32_cmpnless: + i = 6; + name = "cmpnless"; + break; + case X86::BI__builtin_ia32_cmpordss: + i = 7; + name = "cmpordss"; + break; + } + + Ops.push_back(llvm::ConstantInt::get(llvm::Type::Int8Ty, i)); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ss); + return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name); + } + case X86::BI__builtin_ia32_cmpordps: + case X86::BI__builtin_ia32_cmpunordps: + case X86::BI__builtin_ia32_cmpeqps: + case X86::BI__builtin_ia32_cmpltps: + case X86::BI__builtin_ia32_cmpleps: + case X86::BI__builtin_ia32_cmpneqps: + case X86::BI__builtin_ia32_cmpngtps: + case X86::BI__builtin_ia32_cmpnltps: + case X86::BI__builtin_ia32_cmpgtps: + case X86::BI__builtin_ia32_cmpgeps: + case X86::BI__builtin_ia32_cmpngeps: + case X86::BI__builtin_ia32_cmpnleps: { + unsigned i = 0; + const char *name = 0; + bool ShouldSwap = false; + switch (BuiltinID) { + default: assert(0 && "Unknown compare builtin!"); + case X86::BI__builtin_ia32_cmpeqps: i = 0; name = "cmpeqps"; break; + case X86::BI__builtin_ia32_cmpltps: i = 1; name = "cmpltps"; break; + case X86::BI__builtin_ia32_cmpleps: i = 2; name = "cmpleps"; break; + case X86::BI__builtin_ia32_cmpunordps: i = 3; name = "cmpunordps"; break; + case X86::BI__builtin_ia32_cmpneqps: i = 4; name = "cmpneqps"; break; + case X86::BI__builtin_ia32_cmpnltps: i = 5; name = "cmpntlps"; break; + case X86::BI__builtin_ia32_cmpnleps: i = 6; name = "cmpnleps"; break; + case X86::BI__builtin_ia32_cmpordps: i = 7; name = "cmpordps"; break; + case X86::BI__builtin_ia32_cmpgtps: + ShouldSwap = true; + i = 1; + name = "cmpgtps"; + break; + case X86::BI__builtin_ia32_cmpgeps: + i = 2; + name = "cmpgeps"; + ShouldSwap = true; + break; + case X86::BI__builtin_ia32_cmpngtps: + i = 5; + name = "cmpngtps"; + ShouldSwap = true; + break; + case X86::BI__builtin_ia32_cmpngeps: + i = 6; + name = "cmpngeps"; + ShouldSwap = true; + break; + } + + if (ShouldSwap) + std::swap(Ops[0], Ops[1]); + + Ops.push_back(llvm::ConstantInt::get(llvm::Type::Int8Ty, i)); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ps); + return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name); + } + case X86::BI__builtin_ia32_movss: + return EmitShuffleVector(Ops[0], Ops[1], 4, 1, 2, 3, "movss"); + case X86::BI__builtin_ia32_shufps: + unsigned i = cast<ConstantInt>(Ops[2])->getZExtValue(); + return EmitShuffleVector(Ops[0], Ops[1], + i & 0x3, (i & 0xc) >> 2, + ((i & 0x30) >> 4) + 4, + ((i & 0x60) >> 6) + 4, "shufps"); + } +} + +Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + switch (BuiltinID) { + default: return 0; + } +} diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp new file mode 100644 index 00000000000..c80cecc76ee --- /dev/null +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -0,0 +1,163 @@ +//===--- CGDecl.cpp - Emit LLVM Code for declarations ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Decl nodes as LLVM code. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "CodeGenModule.h" +#include "clang/AST/AST.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Type.h" +using namespace clang; +using namespace CodeGen; + + +void CodeGenFunction::EmitDecl(const Decl &D) { + switch (D.getKind()) { + default: assert(0 && "Unknown decl kind!"); + case Decl::FileVar: + assert(0 && "Should not see file-scope variables inside a function!"); + case Decl::ParmVar: + assert(0 && "Parmdecls should not be in declstmts!"); + case Decl::Typedef: // typedef int X; + case Decl::Function: // void X(); + case Decl::Struct: // struct X; + case Decl::Union: // union X; + case Decl::Class: // class X; + case Decl::Enum: // enum X; + // None of these decls require codegen support. + return; + + case Decl::BlockVar: + return EmitBlockVarDecl(cast<BlockVarDecl>(D)); + case Decl::EnumConstant: + return EmitEnumConstantDecl(cast<EnumConstantDecl>(D)); + } +} + +void CodeGenFunction::EmitEnumConstantDecl(const EnumConstantDecl &D) { + assert(0 && "FIXME: Enum constant decls not implemented yet!"); +} + +/// EmitBlockVarDecl - This method handles emission of any variable declaration +/// inside a function, including static vars etc. +void CodeGenFunction::EmitBlockVarDecl(const BlockVarDecl &D) { + switch (D.getStorageClass()) { + case VarDecl::Static: + return EmitStaticBlockVarDecl(D); + case VarDecl::Extern: + // Don't emit it now, allow it to be emitted lazily on its first use. + return; + default: + assert((D.getStorageClass() == VarDecl::None || + D.getStorageClass() == VarDecl::Auto || + D.getStorageClass() == VarDecl::Register) && + "Unknown storage class"); + return EmitLocalBlockVarDecl(D); + } +} + +void CodeGenFunction::EmitStaticBlockVarDecl(const BlockVarDecl &D) { + QualType Ty = D.getCanonicalType(); + assert(Ty->isConstantSizeType() && "VLAs can't be static"); + + llvm::Value *&DMEntry = LocalDeclMap[&D]; + assert(DMEntry == 0 && "Decl already exists in localdeclmap!"); + + const llvm::Type *LTy = CGM.getTypes().ConvertTypeForMem(Ty); + llvm::Constant *Init = 0; + if (D.getInit() == 0) { + Init = llvm::Constant::getNullValue(LTy); + } else { + Init = CGM.EmitConstantExpr(D.getInit(), this); + } + + assert(Init && "Unable to create initialiser for static decl"); + + std::string ContextName; + if (CurFuncDecl) + ContextName = CurFuncDecl->getName(); + else + assert(0 && "Unknown context for block var decl"); // FIXME Handle objc. + + DMEntry = + new llvm::GlobalVariable(LTy, false, + llvm::GlobalValue::InternalLinkage, + Init, ContextName + "." + D.getName(), + &CGM.getModule(), 0, + Ty.getAddressSpace()); + +} + +/// EmitLocalBlockVarDecl - Emit code and set up an entry in LocalDeclMap for a +/// variable declaration with auto, register, or no storage class specifier. +/// These turn into simple stack objects. +void CodeGenFunction::EmitLocalBlockVarDecl(const BlockVarDecl &D) { + QualType Ty = D.getCanonicalType(); + + llvm::Value *DeclPtr; + if (Ty->isConstantSizeType()) { + // A normal fixed sized variable becomes an alloca in the entry block. + const llvm::Type *LTy = ConvertType(Ty); + // TODO: Alignment + DeclPtr = CreateTempAlloca(LTy, D.getName()); + } else { + // TODO: Create a dynamic alloca. + assert(0 && "FIXME: Local VLAs not implemented yet"); + } + + llvm::Value *&DMEntry = LocalDeclMap[&D]; + assert(DMEntry == 0 && "Decl already exists in localdeclmap!"); + DMEntry = DeclPtr; + + // If this local has an initializer, emit it now. + if (const Expr *Init = D.getInit()) { + if (!hasAggregateLLVMType(Init->getType())) { + llvm::Value *V = EmitScalarExpr(Init); + Builder.CreateStore(V, DeclPtr, D.getType().isVolatileQualified()); + } else if (Init->getType()->isComplexType()) { + EmitComplexExprIntoAddr(Init, DeclPtr, D.getType().isVolatileQualified()); + } else { + EmitAggExpr(Init, DeclPtr, D.getType().isVolatileQualified()); + } + } +} + +/// Emit an alloca for the specified parameter and set up LocalDeclMap. +void CodeGenFunction::EmitParmDecl(const ParmVarDecl &D, llvm::Value *Arg) { + QualType Ty = D.getCanonicalType(); + + llvm::Value *DeclPtr; + if (!Ty->isConstantSizeType()) { + // Variable sized values always are passed by-reference. + DeclPtr = Arg; + } else { + // A fixed sized first class variable becomes an alloca in the entry block. + const llvm::Type *LTy = ConvertType(Ty); + if (LTy->isFirstClassType()) { + // TODO: Alignment + DeclPtr = new llvm::AllocaInst(LTy, 0, std::string(D.getName())+".addr", + AllocaInsertPt); + + // Store the initial value into the alloca. + Builder.CreateStore(Arg, DeclPtr); + } else { + // Otherwise, if this is an aggregate, just use the input pointer. + DeclPtr = Arg; + } + Arg->setName(D.getName()); + } + + llvm::Value *&DMEntry = LocalDeclMap[&D]; + assert(DMEntry == 0 && "Decl already exists in localdeclmap!"); + DMEntry = DeclPtr; +} + diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp new file mode 100644 index 00000000000..932a5c5da8b --- /dev/null +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -0,0 +1,615 @@ +//===--- CGExpr.cpp - Emit LLVM Code from Expressions ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Expr nodes as LLVM code. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "CodeGenModule.h" +#include "clang/AST/AST.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Support/MathExtras.h" +using namespace clang; +using namespace CodeGen; + +//===--------------------------------------------------------------------===// +// Miscellaneous Helper Methods +//===--------------------------------------------------------------------===// + +/// CreateTempAlloca - This creates a alloca and inserts it into the entry +/// block. +llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(const llvm::Type *Ty, + const char *Name) { + return new llvm::AllocaInst(Ty, 0, Name, AllocaInsertPt); +} + +/// EvaluateExprAsBool - Perform the usual unary conversions on the specified +/// expression and compare the result against zero, returning an Int1Ty value. +llvm::Value *CodeGenFunction::EvaluateExprAsBool(const Expr *E) { + QualType BoolTy = getContext().BoolTy; + if (!E->getType()->isComplexType()) + return EmitScalarConversion(EmitScalarExpr(E), E->getType(), BoolTy); + + return EmitComplexToScalarConversion(EmitComplexExpr(E), E->getType(),BoolTy); +} + +/// EmitAnyExpr - Emit code to compute the specified expression which can have +/// any type. The result is returned as an RValue struct. If this is an +/// aggregate expression, the aggloc/agglocvolatile arguments indicate where +/// the result should be returned. +RValue CodeGenFunction::EmitAnyExpr(const Expr *E, llvm::Value *AggLoc, + bool isAggLocVolatile) { + if (!hasAggregateLLVMType(E->getType())) + return RValue::get(EmitScalarExpr(E)); + else if (E->getType()->isComplexType()) + return RValue::getComplex(EmitComplexExpr(E)); + + EmitAggExpr(E, AggLoc, isAggLocVolatile); + return RValue::getAggregate(AggLoc); +} + + +//===----------------------------------------------------------------------===// +// LValue Expression Emission +//===----------------------------------------------------------------------===// + +/// EmitLValue - Emit code to compute a designator that specifies the location +/// of the expression. +/// +/// This can return one of two things: a simple address or a bitfield +/// reference. In either case, the LLVM Value* in the LValue structure is +/// guaranteed to be an LLVM pointer type. +/// +/// If this returns a bitfield reference, nothing about the pointee type of +/// the LLVM value is known: For example, it may not be a pointer to an +/// integer. +/// +/// If this returns a normal address, and if the lvalue's C type is fixed +/// size, this method guarantees that the returned pointer type will point to +/// an LLVM type of the same size of the lvalue's type. If the lvalue has a +/// variable length type, this is not possible. +/// +LValue CodeGenFunction::EmitLValue(const Expr *E) { + switch (E->getStmtClass()) { + default: { + WarnUnsupported(E, "l-value expression"); + llvm::Type *Ty = llvm::PointerType::getUnqual(ConvertType(E->getType())); + return LValue::MakeAddr(llvm::UndefValue::get(Ty)); + } + + case Expr::CallExprClass: return EmitCallExprLValue(cast<CallExpr>(E)); + case Expr::DeclRefExprClass: return EmitDeclRefLValue(cast<DeclRefExpr>(E)); + case Expr::ParenExprClass:return EmitLValue(cast<ParenExpr>(E)->getSubExpr()); + case Expr::PreDefinedExprClass: + return EmitPreDefinedLValue(cast<PreDefinedExpr>(E)); + case Expr::StringLiteralClass: + return EmitStringLiteralLValue(cast<StringLiteral>(E)); + + case Expr::UnaryOperatorClass: + return EmitUnaryOpLValue(cast<UnaryOperator>(E)); + case Expr::ArraySubscriptExprClass: + return EmitArraySubscriptExpr(cast<ArraySubscriptExpr>(E)); + case Expr::OCUVectorElementExprClass: + return EmitOCUVectorElementExpr(cast<OCUVectorElementExpr>(E)); + case Expr::MemberExprClass: return EmitMemberExpr(cast<MemberExpr>(E)); + } +} + +/// EmitLoadOfLValue - Given an expression that represents a value lvalue, +/// this method emits the address of the lvalue, then loads the result as an +/// rvalue, returning the rvalue. +RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, QualType ExprType) { + if (LV.isSimple()) { + llvm::Value *Ptr = LV.getAddress(); + const llvm::Type *EltTy = + cast<llvm::PointerType>(Ptr->getType())->getElementType(); + + // Simple scalar l-value. + if (EltTy->isFirstClassType()) { + llvm::Value *V = Builder.CreateLoad(Ptr, "tmp"); + + // Bool can have different representation in memory than in registers. + if (ExprType->isBooleanType()) { + if (V->getType() != llvm::Type::Int1Ty) + V = Builder.CreateTrunc(V, llvm::Type::Int1Ty, "tobool"); + } + + return RValue::get(V); + } + + assert(ExprType->isFunctionType() && "Unknown scalar value"); + return RValue::get(Ptr); + } + + if (LV.isVectorElt()) { + llvm::Value *Vec = Builder.CreateLoad(LV.getVectorAddr(), "tmp"); + return RValue::get(Builder.CreateExtractElement(Vec, LV.getVectorIdx(), + "vecext")); + } + + // If this is a reference to a subset of the elements of a vector, either + // shuffle the input or extract/insert them as appropriate. + if (LV.isOCUVectorElt()) + return EmitLoadOfOCUElementLValue(LV, ExprType); + + if (LV.isBitfield()) + return EmitLoadOfBitfieldLValue(LV, ExprType); + + assert(0 && "Unknown LValue type!"); + //an invalid RValue, but the assert will + //ensure that this point is never reached + return RValue(); +} + +RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV, + QualType ExprType) { + llvm::Value *Ptr = LV.getBitfieldAddr(); + const llvm::Type *EltTy = + cast<llvm::PointerType>(Ptr->getType())->getElementType(); + unsigned EltTySize = EltTy->getPrimitiveSizeInBits(); + unsigned short BitfieldSize = LV.getBitfieldSize(); + unsigned short EndBit = LV.getBitfieldStartBit() + BitfieldSize; + + llvm::Value *V = Builder.CreateLoad(Ptr, "tmp"); + + llvm::Value *ShAmt = llvm::ConstantInt::get(EltTy, EltTySize - EndBit); + V = Builder.CreateShl(V, ShAmt, "tmp"); + + ShAmt = llvm::ConstantInt::get(EltTy, EltTySize - BitfieldSize); + V = LV.isBitfieldSigned() ? + Builder.CreateAShr(V, ShAmt, "tmp") : + Builder.CreateLShr(V, ShAmt, "tmp"); + return RValue::get(V); +} + +// If this is a reference to a subset of the elements of a vector, either +// shuffle the input or extract/insert them as appropriate. +RValue CodeGenFunction::EmitLoadOfOCUElementLValue(LValue LV, + QualType ExprType) { + llvm::Value *Vec = Builder.CreateLoad(LV.getOCUVectorAddr(), "tmp"); + + unsigned EncFields = LV.getOCUVectorElts(); + + // If the result of the expression is a non-vector type, we must be + // extracting a single element. Just codegen as an extractelement. + const VectorType *ExprVT = ExprType->getAsVectorType(); + if (!ExprVT) { + unsigned InIdx = OCUVectorElementExpr::getAccessedFieldNo(0, EncFields); + llvm::Value *Elt = llvm::ConstantInt::get(llvm::Type::Int32Ty, InIdx); + return RValue::get(Builder.CreateExtractElement(Vec, Elt, "tmp")); + } + + // If the source and destination have the same number of elements, use a + // vector shuffle instead of insert/extracts. + unsigned NumResultElts = ExprVT->getNumElements(); + unsigned NumSourceElts = + cast<llvm::VectorType>(Vec->getType())->getNumElements(); + + if (NumResultElts == NumSourceElts) { + llvm::SmallVector<llvm::Constant*, 4> Mask; + for (unsigned i = 0; i != NumResultElts; ++i) { + unsigned InIdx = OCUVectorElementExpr::getAccessedFieldNo(i, EncFields); + Mask.push_back(llvm::ConstantInt::get(llvm::Type::Int32Ty, InIdx)); + } + + llvm::Value *MaskV = llvm::ConstantVector::get(&Mask[0], Mask.size()); + Vec = Builder.CreateShuffleVector(Vec, + llvm::UndefValue::get(Vec->getType()), + MaskV, "tmp"); + return RValue::get(Vec); + } + + // Start out with an undef of the result type. + llvm::Value *Result = llvm::UndefValue::get(ConvertType(ExprType)); + + // Extract/Insert each element of the result. + for (unsigned i = 0; i != NumResultElts; ++i) { + unsigned InIdx = OCUVectorElementExpr::getAccessedFieldNo(i, EncFields); + llvm::Value *Elt = llvm::ConstantInt::get(llvm::Type::Int32Ty, InIdx); + Elt = Builder.CreateExtractElement(Vec, Elt, "tmp"); + + llvm::Value *OutIdx = llvm::ConstantInt::get(llvm::Type::Int32Ty, i); + Result = Builder.CreateInsertElement(Result, Elt, OutIdx, "tmp"); + } + + return RValue::get(Result); +} + + + +/// EmitStoreThroughLValue - Store the specified rvalue into the specified +/// lvalue, where both are guaranteed to the have the same type, and that type +/// is 'Ty'. +void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, + QualType Ty) { + if (!Dst.isSimple()) { + if (Dst.isVectorElt()) { + // Read/modify/write the vector, inserting the new element. + // FIXME: Volatility. + llvm::Value *Vec = Builder.CreateLoad(Dst.getVectorAddr(), "tmp"); + Vec = Builder.CreateInsertElement(Vec, Src.getScalarVal(), + Dst.getVectorIdx(), "vecins"); + Builder.CreateStore(Vec, Dst.getVectorAddr()); + return; + } + + // If this is an update of elements of a vector, insert them as appropriate. + if (Dst.isOCUVectorElt()) + return EmitStoreThroughOCUComponentLValue(Src, Dst, Ty); + + if (Dst.isBitfield()) + return EmitStoreThroughBitfieldLValue(Src, Dst, Ty); + + assert(0 && "Unknown LValue type"); + } + + llvm::Value *DstAddr = Dst.getAddress(); + assert(Src.isScalar() && "Can't emit an agg store with this method"); + // FIXME: Handle volatility etc. + const llvm::Type *SrcTy = Src.getScalarVal()->getType(); + const llvm::PointerType *DstPtr = cast<llvm::PointerType>(DstAddr->getType()); + const llvm::Type *AddrTy = DstPtr->getElementType(); + unsigned AS = DstPtr->getAddressSpace(); + + if (AddrTy != SrcTy) + DstAddr = Builder.CreateBitCast(DstAddr, + llvm::PointerType::get(SrcTy, AS), + "storetmp"); + Builder.CreateStore(Src.getScalarVal(), DstAddr); +} + +void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, + QualType Ty) { + unsigned short StartBit = Dst.getBitfieldStartBit(); + unsigned short BitfieldSize = Dst.getBitfieldSize(); + llvm::Value *Ptr = Dst.getBitfieldAddr(); + const llvm::Type *EltTy = + cast<llvm::PointerType>(Ptr->getType())->getElementType(); + unsigned EltTySize = EltTy->getPrimitiveSizeInBits(); + + llvm::Value *NewVal = Src.getScalarVal(); + llvm::Value *OldVal = Builder.CreateLoad(Ptr, "tmp"); + + llvm::Value *ShAmt = llvm::ConstantInt::get(EltTy, StartBit); + NewVal = Builder.CreateShl(NewVal, ShAmt, "tmp"); + + llvm::Constant *Mask = llvm::ConstantInt::get( + llvm::APInt::getBitsSet(EltTySize, StartBit, + StartBit + BitfieldSize)); + + // Mask out any bits that shouldn't be set in the result. + NewVal = Builder.CreateAnd(NewVal, Mask, "tmp"); + + // Next, mask out the bits this bit-field should include from the old value. + Mask = llvm::ConstantExpr::getNot(Mask); + OldVal = Builder.CreateAnd(OldVal, Mask, "tmp"); + + // Finally, merge the two together and store it. + NewVal = Builder.CreateOr(OldVal, NewVal, "tmp"); + + Builder.CreateStore(NewVal, Ptr); +} + +void CodeGenFunction::EmitStoreThroughOCUComponentLValue(RValue Src, LValue Dst, + QualType Ty) { + // This access turns into a read/modify/write of the vector. Load the input + // value now. + llvm::Value *Vec = Builder.CreateLoad(Dst.getOCUVectorAddr(), "tmp"); + // FIXME: Volatility. + unsigned EncFields = Dst.getOCUVectorElts(); + + llvm::Value *SrcVal = Src.getScalarVal(); + + if (const VectorType *VTy = Ty->getAsVectorType()) { + unsigned NumSrcElts = VTy->getNumElements(); + + // Extract/Insert each element. + for (unsigned i = 0; i != NumSrcElts; ++i) { + llvm::Value *Elt = llvm::ConstantInt::get(llvm::Type::Int32Ty, i); + Elt = Builder.CreateExtractElement(SrcVal, Elt, "tmp"); + + unsigned Idx = OCUVectorElementExpr::getAccessedFieldNo(i, EncFields); + llvm::Value *OutIdx = llvm::ConstantInt::get(llvm::Type::Int32Ty, Idx); + Vec = Builder.CreateInsertElement(Vec, Elt, OutIdx, "tmp"); + } + } else { + // If the Src is a scalar (not a vector) it must be updating one element. + unsigned InIdx = OCUVectorElementExpr::getAccessedFieldNo(0, EncFields); + llvm::Value *Elt = llvm::ConstantInt::get(llvm::Type::Int32Ty, InIdx); + Vec = Builder.CreateInsertElement(Vec, SrcVal, Elt, "tmp"); + } + + Builder.CreateStore(Vec, Dst.getOCUVectorAddr()); +} + + +LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { + const ValueDecl *D = E->getDecl(); + if (isa<BlockVarDecl>(D) || isa<ParmVarDecl>(D)) { + const VarDecl *VD = cast<VarDecl>(D); + if (VD->getStorageClass() == VarDecl::Extern) + return LValue::MakeAddr(CGM.GetAddrOfGlobalVar(VD, false)); + else { + llvm::Value *V = LocalDeclMap[D]; + assert(V && "BlockVarDecl not entered in LocalDeclMap?"); + return LValue::MakeAddr(V); + } + } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) { + return LValue::MakeAddr(CGM.GetAddrOfFunctionDecl(FD, false)); + } else if (const FileVarDecl *FVD = dyn_cast<FileVarDecl>(D)) { + return LValue::MakeAddr(CGM.GetAddrOfGlobalVar(FVD, false)); + } + assert(0 && "Unimp declref"); + //an invalid LValue, but the assert will + //ensure that this point is never reached. + return LValue(); +} + +LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { + // __extension__ doesn't affect lvalue-ness. + if (E->getOpcode() == UnaryOperator::Extension) + return EmitLValue(E->getSubExpr()); + + switch (E->getOpcode()) { + default: assert(0 && "Unknown unary operator lvalue!"); + case UnaryOperator::Deref: + return LValue::MakeAddr(EmitScalarExpr(E->getSubExpr())); + case UnaryOperator::Real: + case UnaryOperator::Imag: + LValue LV = EmitLValue(E->getSubExpr()); + + llvm::Constant *Zero = llvm::ConstantInt::get(llvm::Type::Int32Ty, 0); + llvm::Constant *Idx = llvm::ConstantInt::get(llvm::Type::Int32Ty, + E->getOpcode() == UnaryOperator::Imag); + llvm::Value *Ops[] = {Zero, Idx}; + return LValue::MakeAddr(Builder.CreateGEP(LV.getAddress(), Ops, Ops+2, + "idx")); + } +} + +LValue CodeGenFunction::EmitStringLiteralLValue(const StringLiteral *E) { + assert(!E->isWide() && "FIXME: Wide strings not supported yet!"); + const char *StrData = E->getStrData(); + unsigned Len = E->getByteLength(); + std::string StringLiteral(StrData, StrData+Len); + return LValue::MakeAddr(CGM.GetAddrOfConstantString(StringLiteral)); +} + +LValue CodeGenFunction::EmitPreDefinedLValue(const PreDefinedExpr *E) { + std::string FunctionName(CurFuncDecl->getName()); + std::string GlobalVarName; + + switch (E->getIdentType()) { + default: + assert(0 && "unknown pre-defined ident type"); + case PreDefinedExpr::Func: + GlobalVarName = "__func__."; + break; + case PreDefinedExpr::Function: + GlobalVarName = "__FUNCTION__."; + break; + case PreDefinedExpr::PrettyFunction: + // FIXME:: Demangle C++ method names + GlobalVarName = "__PRETTY_FUNCTION__."; + break; + } + + GlobalVarName += CurFuncDecl->getName(); + + // FIXME: Can cache/reuse these within the module. + llvm::Constant *C=llvm::ConstantArray::get(FunctionName); + + // Create a global variable for this. + C = new llvm::GlobalVariable(C->getType(), true, + llvm::GlobalValue::InternalLinkage, + C, GlobalVarName, CurFn->getParent()); + return LValue::MakeAddr(C); +} + +LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E) { + // The index must always be an integer, which is not an aggregate. Emit it. + llvm::Value *Idx = EmitScalarExpr(E->getIdx()); + + // If the base is a vector type, then we are forming a vector element lvalue + // with this subscript. + if (E->getLHS()->getType()->isVectorType()) { + // Emit the vector as an lvalue to get its address. + LValue LHS = EmitLValue(E->getLHS()); + assert(LHS.isSimple() && "Can only subscript lvalue vectors here!"); + // FIXME: This should properly sign/zero/extend or truncate Idx to i32. + return LValue::MakeVectorElt(LHS.getAddress(), Idx); + } + + // The base must be a pointer, which is not an aggregate. Emit it. + llvm::Value *Base = EmitScalarExpr(E->getBase()); + + // Extend or truncate the index type to 32 or 64-bits. + QualType IdxTy = E->getIdx()->getType(); + bool IdxSigned = IdxTy->isSignedIntegerType(); + unsigned IdxBitwidth = cast<llvm::IntegerType>(Idx->getType())->getBitWidth(); + if (IdxBitwidth != LLVMPointerWidth) + Idx = Builder.CreateIntCast(Idx, llvm::IntegerType::get(LLVMPointerWidth), + IdxSigned, "idxprom"); + + // We know that the pointer points to a type of the correct size, unless the + // size is a VLA. + if (!E->getType()->isConstantSizeType()) + assert(0 && "VLA idx not implemented"); + return LValue::MakeAddr(Builder.CreateGEP(Base, Idx, "arrayidx")); +} + +LValue CodeGenFunction:: +EmitOCUVectorElementExpr(const OCUVectorElementExpr *E) { + // Emit the base vector as an l-value. + LValue Base = EmitLValue(E->getBase()); + assert(Base.isSimple() && "Can only subscript lvalue vectors here!"); + + return LValue::MakeOCUVectorElt(Base.getAddress(), + E->getEncodedElementAccess()); +} + +LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { + bool isUnion = false; + Expr *BaseExpr = E->getBase(); + llvm::Value *BaseValue = NULL; + + // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a scalar. + if (E->isArrow()) { + BaseValue = EmitScalarExpr(BaseExpr); + const PointerType *PTy = + cast<PointerType>(BaseExpr->getType().getCanonicalType()); + if (PTy->getPointeeType()->isUnionType()) + isUnion = true; + } + else { + LValue BaseLV = EmitLValue(BaseExpr); + // FIXME: this isn't right for bitfields. + BaseValue = BaseLV.getAddress(); + if (BaseExpr->getType()->isUnionType()) + isUnion = true; + } + + FieldDecl *Field = E->getMemberDecl(); + return EmitLValueForField(BaseValue, Field, isUnion); +} + +LValue CodeGenFunction::EmitLValueForField(llvm::Value* BaseValue, + FieldDecl* Field, + bool isUnion) +{ + llvm::Value *V; + unsigned idx = CGM.getTypes().getLLVMFieldNo(Field); + + if (Field->isBitField()) { + const llvm::Type * FieldTy = ConvertType(Field->getType()); + const llvm::PointerType * BaseTy = + cast<llvm::PointerType>(BaseValue->getType()); + unsigned AS = BaseTy->getAddressSpace(); + BaseValue = Builder.CreateBitCast(BaseValue, + llvm::PointerType::get(FieldTy, AS), + "tmp"); + V = Builder.CreateGEP(BaseValue, + llvm::ConstantInt::get(llvm::Type::Int32Ty, idx), + "tmp"); + } else { + llvm::Value *Idxs[2] = { llvm::Constant::getNullValue(llvm::Type::Int32Ty), + llvm::ConstantInt::get(llvm::Type::Int32Ty, idx) }; + V = Builder.CreateGEP(BaseValue,Idxs, Idxs + 2, "tmp"); + } + // Match union field type. + if (isUnion) { + const llvm::Type * FieldTy = ConvertType(Field->getType()); + const llvm::PointerType * BaseTy = + cast<llvm::PointerType>(BaseValue->getType()); + if (FieldTy != BaseTy->getElementType()) { + unsigned AS = BaseTy->getAddressSpace(); + V = Builder.CreateBitCast(V, + llvm::PointerType::get(FieldTy, AS), + "tmp"); + } + } + + if (Field->isBitField()) { + CodeGenTypes::BitFieldInfo bitFieldInfo = + CGM.getTypes().getBitFieldInfo(Field); + return LValue::MakeBitfield(V, bitFieldInfo.Begin, bitFieldInfo.Size, + Field->getType()->isSignedIntegerType()); + } else + return LValue::MakeAddr(V); +} + +//===--------------------------------------------------------------------===// +// Expression Emission +//===--------------------------------------------------------------------===// + + +RValue CodeGenFunction::EmitCallExpr(const CallExpr *E) { + if (const ImplicitCastExpr *IcExpr = + dyn_cast<const ImplicitCastExpr>(E->getCallee())) + if (const DeclRefExpr *DRExpr = + dyn_cast<const DeclRefExpr>(IcExpr->getSubExpr())) + if (const FunctionDecl *FDecl = + dyn_cast<const FunctionDecl>(DRExpr->getDecl())) + if (unsigned builtinID = FDecl->getIdentifier()->getBuiltinID()) + return EmitBuiltinExpr(builtinID, E); + + llvm::Value *Callee = EmitScalarExpr(E->getCallee()); + return EmitCallExpr(Callee, E->getCallee()->getType(), + E->arg_begin(), E->getNumArgs()); +} + +RValue CodeGenFunction::EmitCallExpr(Expr *FnExpr, Expr *const *Args, + unsigned NumArgs) { + llvm::Value *Callee = EmitScalarExpr(FnExpr); + return EmitCallExpr(Callee, FnExpr->getType(), Args, NumArgs); +} + +LValue CodeGenFunction::EmitCallExprLValue(const CallExpr *E) { + // Can only get l-value for call expression returning aggregate type + RValue RV = EmitCallExpr(E); + return LValue::MakeAddr(RV.getAggregateAddr()); +} + +RValue CodeGenFunction::EmitCallExpr(llvm::Value *Callee, QualType FnType, + Expr *const *ArgExprs, unsigned NumArgs) { + // The callee type will always be a pointer to function type, get the function + // type. + FnType = cast<PointerType>(FnType.getCanonicalType())->getPointeeType(); + QualType ResultType = cast<FunctionType>(FnType)->getResultType(); + + llvm::SmallVector<llvm::Value*, 16> Args; + + // Handle struct-return functions by passing a pointer to the location that + // we would like to return into. + if (hasAggregateLLVMType(ResultType)) { + // Create a temporary alloca to hold the result of the call. :( + Args.push_back(CreateTempAlloca(ConvertType(ResultType))); + // FIXME: set the stret attribute on the argument. + } + + for (unsigned i = 0, e = NumArgs; i != e; ++i) { + QualType ArgTy = ArgExprs[i]->getType(); + + if (!hasAggregateLLVMType(ArgTy)) { + // Scalar argument is passed by-value. + Args.push_back(EmitScalarExpr(ArgExprs[i])); + } else if (ArgTy->isComplexType()) { + // Make a temporary alloca to pass the argument. + llvm::Value *DestMem = CreateTempAlloca(ConvertType(ArgTy)); + EmitComplexExprIntoAddr(ArgExprs[i], DestMem, false); + Args.push_back(DestMem); + } else { + llvm::Value *DestMem = CreateTempAlloca(ConvertType(ArgTy)); + EmitAggExpr(ArgExprs[i], DestMem, false); + Args.push_back(DestMem); + } + } + + llvm::CallInst *CI = Builder.CreateCall(Callee,&Args[0],&Args[0]+Args.size()); + if (const llvm::Function *F = dyn_cast<llvm::Function>(Callee)) + CI->setCallingConv(F->getCallingConv()); + if (CI->getType() != llvm::Type::VoidTy) + CI->setName("call"); + else if (ResultType->isComplexType()) + return RValue::getComplex(LoadComplexFromAddr(Args[0], false)); + else if (hasAggregateLLVMType(ResultType)) + // Struct return. + return RValue::getAggregate(Args[0]); + else { + // void return. + assert(ResultType->isVoidType() && "Should only have a void expr here"); + CI = 0; + } + + return RValue::get(CI); +} diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp new file mode 100644 index 00000000000..325ac2109eb --- /dev/null +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -0,0 +1,337 @@ +//===--- CGExprAgg.cpp - Emit LLVM Code from Aggregate Expressions --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Aggregate Expr nodes as LLVM code. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "CodeGenModule.h" +#include "clang/AST/AST.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Support/Compiler.h" +using namespace clang; +using namespace CodeGen; + +//===----------------------------------------------------------------------===// +// Aggregate Expression Emitter +//===----------------------------------------------------------------------===// + +namespace { +class VISIBILITY_HIDDEN AggExprEmitter : public StmtVisitor<AggExprEmitter> { + CodeGenFunction &CGF; + llvm::LLVMFoldingBuilder &Builder; + llvm::Value *DestPtr; + bool VolatileDest; +public: + AggExprEmitter(CodeGenFunction &cgf, llvm::Value *destPtr, bool volatileDest) + : CGF(cgf), Builder(CGF.Builder), + DestPtr(destPtr), VolatileDest(volatileDest) { + } + + //===--------------------------------------------------------------------===// + // Utilities + //===--------------------------------------------------------------------===// + + /// EmitAggLoadOfLValue - Given an expression with aggregate type that + /// represents a value lvalue, this method emits the address of the lvalue, + /// then loads the result into DestPtr. + void EmitAggLoadOfLValue(const Expr *E); + + void EmitAggregateCopy(llvm::Value *DestPtr, llvm::Value *SrcPtr, + QualType EltTy); + + void EmitAggregateClear(llvm::Value *DestPtr, QualType Ty); + + void EmitNonConstInit(InitListExpr *E); + + //===--------------------------------------------------------------------===// + // Visitor Methods + //===--------------------------------------------------------------------===// + + void VisitStmt(Stmt *S) { + CGF.WarnUnsupported(S, "aggregate expression"); + } + void VisitParenExpr(ParenExpr *PE) { Visit(PE->getSubExpr()); } + + // l-values. + void VisitDeclRefExpr(DeclRefExpr *DRE) { EmitAggLoadOfLValue(DRE); } + void VisitMemberExpr(MemberExpr *ME) { EmitAggLoadOfLValue(ME); } + void VisitUnaryDeref(UnaryOperator *E) { EmitAggLoadOfLValue(E); } + void VisitStringLiteral(StringLiteral *E) { EmitAggLoadOfLValue(E); } + + void VisitArraySubscriptExpr(ArraySubscriptExpr *E) { + EmitAggLoadOfLValue(E); + } + + // Operators. + // case Expr::UnaryOperatorClass: + // case Expr::CastExprClass: + void VisitImplicitCastExpr(ImplicitCastExpr *E); + void VisitCallExpr(const CallExpr *E); + void VisitStmtExpr(const StmtExpr *E); + void VisitBinaryOperator(const BinaryOperator *BO); + void VisitBinAssign(const BinaryOperator *E); + void VisitOverloadExpr(const OverloadExpr *E); + + + void VisitConditionalOperator(const ConditionalOperator *CO); + void VisitInitListExpr(InitListExpr *E); + // case Expr::ChooseExprClass: + +}; +} // end anonymous namespace. + +//===----------------------------------------------------------------------===// +// Utilities +//===----------------------------------------------------------------------===// + +void AggExprEmitter::EmitAggregateClear(llvm::Value *DestPtr, QualType Ty) { + assert(!Ty->isComplexType() && "Shouldn't happen for complex"); + + // Aggregate assignment turns into llvm.memset. + const llvm::Type *BP = llvm::PointerType::getUnqual(llvm::Type::Int8Ty); + if (DestPtr->getType() != BP) + DestPtr = Builder.CreateBitCast(DestPtr, BP, "tmp"); + + // Get size and alignment info for this aggregate. + std::pair<uint64_t, unsigned> TypeInfo = CGF.getContext().getTypeInfo(Ty); + + // FIXME: Handle variable sized types. + const llvm::Type *IntPtr = llvm::IntegerType::get(CGF.LLVMPointerWidth); + + llvm::Value *MemSetOps[4] = { + DestPtr, + llvm::ConstantInt::getNullValue(llvm::Type::Int8Ty), + // TypeInfo.first describes size in bits. + llvm::ConstantInt::get(IntPtr, TypeInfo.first/8), + llvm::ConstantInt::get(llvm::Type::Int32Ty, TypeInfo.second/8) + }; + + Builder.CreateCall(CGF.CGM.getMemSetFn(), MemSetOps, MemSetOps+4); +} + +void AggExprEmitter::EmitAggregateCopy(llvm::Value *DestPtr, + llvm::Value *SrcPtr, QualType Ty) { + assert(!Ty->isComplexType() && "Shouldn't happen for complex"); + + // Aggregate assignment turns into llvm.memcpy. + const llvm::Type *BP = llvm::PointerType::getUnqual(llvm::Type::Int8Ty); + if (DestPtr->getType() != BP) + DestPtr = Builder.CreateBitCast(DestPtr, BP, "tmp"); + if (SrcPtr->getType() != BP) + SrcPtr = Builder.CreateBitCast(SrcPtr, BP, "tmp"); + + // Get size and alignment info for this aggregate. + std::pair<uint64_t, unsigned> TypeInfo = CGF.getContext().getTypeInfo(Ty); + + // FIXME: Handle variable sized types. + const llvm::Type *IntPtr = llvm::IntegerType::get(CGF.LLVMPointerWidth); + + llvm::Value *MemCpyOps[4] = { + DestPtr, SrcPtr, + // TypeInfo.first describes size in bits. + llvm::ConstantInt::get(IntPtr, TypeInfo.first/8), + llvm::ConstantInt::get(llvm::Type::Int32Ty, TypeInfo.second/8) + }; + + Builder.CreateCall(CGF.CGM.getMemCpyFn(), MemCpyOps, MemCpyOps+4); +} + + +/// EmitAggLoadOfLValue - Given an expression with aggregate type that +/// represents a value lvalue, this method emits the address of the lvalue, +/// then loads the result into DestPtr. +void AggExprEmitter::EmitAggLoadOfLValue(const Expr *E) { + LValue LV = CGF.EmitLValue(E); + assert(LV.isSimple() && "Can't have aggregate bitfield, vector, etc"); + llvm::Value *SrcPtr = LV.getAddress(); + + // If the result is ignored, don't copy from the value. + if (DestPtr == 0) + // FIXME: If the source is volatile, we must read from it. + return; + + EmitAggregateCopy(DestPtr, SrcPtr, E->getType()); +} + +//===----------------------------------------------------------------------===// +// Visitor Methods +//===----------------------------------------------------------------------===// + +void AggExprEmitter::VisitImplicitCastExpr(ImplicitCastExpr *E) +{ + QualType STy = E->getSubExpr()->getType().getCanonicalType(); + QualType Ty = E->getType().getCanonicalType(); + + assert(CGF.getContext().typesAreCompatible( + STy.getUnqualifiedType(), Ty.getUnqualifiedType()) + && "Implicit cast types must be compatible"); + + Visit(E->getSubExpr()); +} + +void AggExprEmitter::VisitCallExpr(const CallExpr *E) +{ + RValue RV = CGF.EmitCallExpr(E); + assert(RV.isAggregate() && "Return value must be aggregate value!"); + + // If the result is ignored, don't copy from the value. + if (DestPtr == 0) + // FIXME: If the source is volatile, we must read from it. + return; + + EmitAggregateCopy(DestPtr, RV.getAggregateAddr(), E->getType()); +} + +void AggExprEmitter::VisitOverloadExpr(const OverloadExpr *E) +{ + RValue RV = CGF.EmitCallExpr(E->getFn(), E->arg_begin(), + E->getNumArgs(CGF.getContext())); + assert(RV.isAggregate() && "Return value must be aggregate value!"); + + // If the result is ignored, don't copy from the value. + if (DestPtr == 0) + // FIXME: If the source is volatile, we must read from it. + return; + + EmitAggregateCopy(DestPtr, RV.getAggregateAddr(), E->getType()); +} + +void AggExprEmitter::VisitStmtExpr(const StmtExpr *E) { + CGF.EmitCompoundStmt(*E->getSubStmt(), true, DestPtr, VolatileDest); +} + +void AggExprEmitter::VisitBinaryOperator(const BinaryOperator *E) { + CGF.WarnUnsupported(E, "aggregate binary expression"); +} + +void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { + // For an assignment to work, the value on the right has + // to be compatible with the value on the left. + assert(CGF.getContext().typesAreCompatible( + E->getLHS()->getType().getUnqualifiedType(), + E->getRHS()->getType().getUnqualifiedType()) + && "Invalid assignment"); + LValue LHS = CGF.EmitLValue(E->getLHS()); + + // Codegen the RHS so that it stores directly into the LHS. + CGF.EmitAggExpr(E->getRHS(), LHS.getAddress(), false /*FIXME: VOLATILE LHS*/); + + if (DestPtr == 0) + return; + + // If the result of the assignment is used, copy the RHS there also. + EmitAggregateCopy(DestPtr, LHS.getAddress(), E->getType()); +} + +void AggExprEmitter::VisitConditionalOperator(const ConditionalOperator *E) { + llvm::BasicBlock *LHSBlock = new llvm::BasicBlock("cond.?"); + llvm::BasicBlock *RHSBlock = new llvm::BasicBlock("cond.:"); + llvm::BasicBlock *ContBlock = new llvm::BasicBlock("cond.cont"); + + llvm::Value *Cond = CGF.EvaluateExprAsBool(E->getCond()); + Builder.CreateCondBr(Cond, LHSBlock, RHSBlock); + + CGF.EmitBlock(LHSBlock); + + // Handle the GNU extension for missing LHS. + assert(E->getLHS() && "Must have LHS for aggregate value"); + + Visit(E->getLHS()); + Builder.CreateBr(ContBlock); + LHSBlock = Builder.GetInsertBlock(); + + CGF.EmitBlock(RHSBlock); + + Visit(E->getRHS()); + Builder.CreateBr(ContBlock); + RHSBlock = Builder.GetInsertBlock(); + + CGF.EmitBlock(ContBlock); +} + +void AggExprEmitter::EmitNonConstInit(InitListExpr *E) { + + const llvm::PointerType *APType = + cast<llvm::PointerType>(DestPtr->getType()); + const llvm::Type *DestType = APType->getElementType(); + + if (const llvm::ArrayType *AType = dyn_cast<llvm::ArrayType>(DestType)) { + unsigned NumInitElements = E->getNumInits(); + + llvm::Value *Idxs[] = { + llvm::Constant::getNullValue(llvm::Type::Int32Ty), + NULL + }; + llvm::Value *NextVal = NULL; + unsigned i; + for (i = 0; i != NumInitElements; ++i) { + Idxs[1] = llvm::ConstantInt::get(llvm::Type::Int32Ty, i); + NextVal = Builder.CreateGEP(DestPtr, Idxs, Idxs + 2,".array"); + Expr *Init = E->getInit(i); + if (isa<InitListExpr>(Init)) + CGF.EmitAggExpr(Init, NextVal, VolatileDest); + else + Builder.CreateStore(CGF.EmitScalarExpr(Init), NextVal); + } + + // Emit remaining default initializers + unsigned NumArrayElements = AType->getNumElements(); + QualType QType = E->getInit(0)->getType(); + const llvm::Type *EType = AType->getElementType(); + for (/*Do not initialize i*/; i < NumArrayElements; ++i) { + Idxs[1] = llvm::ConstantInt::get(llvm::Type::Int32Ty, i); + NextVal = Builder.CreateGEP(DestPtr, Idxs, Idxs + 2,".array"); + if (EType->isFirstClassType()) + Builder.CreateStore(llvm::Constant::getNullValue(EType), NextVal); + else + EmitAggregateClear(NextVal, QType); + } + } else + assert(false && "Invalid initializer"); +} + +void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { + + if (E->isConstantExpr(CGF.CGM.getContext(), NULL)) { + llvm::Constant *V = CGF.CGM.EmitConstantExpr(E); + // Create global value to hold this array. + V = new llvm::GlobalVariable(V->getType(), true, + llvm::GlobalValue::InternalLinkage, + V, ".array", + &CGF.CGM.getModule()); + + EmitAggregateCopy(DestPtr, V , E->getType()); + return; + } else { + if (!E->getType()->isArrayType()) { + CGF.WarnUnsupported(E, "aggregate init-list expression"); + return; + } + EmitNonConstInit(E); + } +} + +//===----------------------------------------------------------------------===// +// Entry Points into this File +//===----------------------------------------------------------------------===// + +/// EmitAggExpr - Emit the computation of the specified expression of +/// aggregate type. The result is computed into DestPtr. Note that if +/// DestPtr is null, the value of the aggregate expression is not needed. +void CodeGenFunction::EmitAggExpr(const Expr *E, llvm::Value *DestPtr, + bool VolatileDest) { + assert(E && hasAggregateLLVMType(E->getType()) && + "Invalid aggregate expression to emit"); + + AggExprEmitter(*this, DestPtr, VolatileDest).Visit(const_cast<Expr*>(E)); +} diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp new file mode 100644 index 00000000000..b1de93570d9 --- /dev/null +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -0,0 +1,542 @@ +//===--- CGExprComplex.cpp - Emit LLVM Code for Complex Exprs -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Expr nodes with complex types as LLVM code. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "CodeGenModule.h" +#include "clang/AST/AST.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Compiler.h" +using namespace clang; +using namespace CodeGen; + +//===----------------------------------------------------------------------===// +// Complex Expression Emitter +//===----------------------------------------------------------------------===// + +typedef CodeGenFunction::ComplexPairTy ComplexPairTy; + +namespace { +class VISIBILITY_HIDDEN ComplexExprEmitter + : public StmtVisitor<ComplexExprEmitter, ComplexPairTy> { + CodeGenFunction &CGF; + llvm::LLVMFoldingBuilder &Builder; +public: + ComplexExprEmitter(CodeGenFunction &cgf) : CGF(cgf), Builder(CGF.Builder) { + } + + + //===--------------------------------------------------------------------===// + // Utilities + //===--------------------------------------------------------------------===// + + /// EmitLoadOfLValue - Given an expression with complex type that represents a + /// value l-value, this method emits the address of the l-value, then loads + /// and returns the result. + ComplexPairTy EmitLoadOfLValue(const Expr *E) { + LValue LV = CGF.EmitLValue(E); + // FIXME: Volatile + return EmitLoadOfComplex(LV.getAddress(), false); + } + + /// EmitLoadOfComplex - Given a pointer to a complex value, emit code to load + /// the real and imaginary pieces. + ComplexPairTy EmitLoadOfComplex(llvm::Value *SrcPtr, bool isVolatile); + + /// EmitStoreOfComplex - Store the specified real/imag parts into the + /// specified value pointer. + void EmitStoreOfComplex(ComplexPairTy Val, llvm::Value *ResPtr, bool isVol); + + /// EmitComplexToComplexCast - Emit a cast from complex value Val to DestType. + ComplexPairTy EmitComplexToComplexCast(ComplexPairTy Val, QualType SrcType, + QualType DestType); + + //===--------------------------------------------------------------------===// + // Visitor Methods + //===--------------------------------------------------------------------===// + + ComplexPairTy VisitStmt(Stmt *S) { + S->dump(CGF.getContext().getSourceManager()); + assert(0 && "Stmt can't have complex result type!"); + return ComplexPairTy(); + } + ComplexPairTy VisitExpr(Expr *S); + ComplexPairTy VisitParenExpr(ParenExpr *PE) { return Visit(PE->getSubExpr());} + ComplexPairTy VisitImaginaryLiteral(const ImaginaryLiteral *IL); + + // l-values. + ComplexPairTy VisitDeclRefExpr(const Expr *E) { return EmitLoadOfLValue(E); } + ComplexPairTy VisitArraySubscriptExpr(Expr *E) { return EmitLoadOfLValue(E); } + ComplexPairTy VisitMemberExpr(const Expr *E) { return EmitLoadOfLValue(E); } + + // FIXME: CompoundLiteralExpr + + ComplexPairTy EmitCast(Expr *Op, QualType DestTy); + ComplexPairTy VisitImplicitCastExpr(ImplicitCastExpr *E) { + // Unlike for scalars, we don't have to worry about function->ptr demotion + // here. + return EmitCast(E->getSubExpr(), E->getType()); + } + ComplexPairTy VisitCastExpr(CastExpr *E) { + return EmitCast(E->getSubExpr(), E->getType()); + } + ComplexPairTy VisitCallExpr(const CallExpr *E); + ComplexPairTy VisitStmtExpr(const StmtExpr *E); + ComplexPairTy VisitOverloadExpr(const OverloadExpr *OE); + + // Operators. + ComplexPairTy VisitPrePostIncDec(const UnaryOperator *E, + bool isInc, bool isPre); + ComplexPairTy VisitUnaryPostDec(const UnaryOperator *E) { + return VisitPrePostIncDec(E, false, false); + } + ComplexPairTy VisitUnaryPostInc(const UnaryOperator *E) { + return VisitPrePostIncDec(E, true, false); + } + ComplexPairTy VisitUnaryPreDec(const UnaryOperator *E) { + return VisitPrePostIncDec(E, false, true); + } + ComplexPairTy VisitUnaryPreInc(const UnaryOperator *E) { + return VisitPrePostIncDec(E, true, true); + } + ComplexPairTy VisitUnaryDeref(const Expr *E) { return EmitLoadOfLValue(E); } + ComplexPairTy VisitUnaryPlus (const UnaryOperator *E) { + return Visit(E->getSubExpr()); + } + ComplexPairTy VisitUnaryMinus (const UnaryOperator *E); + ComplexPairTy VisitUnaryNot (const UnaryOperator *E); + // LNot,SizeOf,AlignOf,Real,Imag never return complex. + ComplexPairTy VisitUnaryExtension(const UnaryOperator *E) { + return Visit(E->getSubExpr()); + } + + struct BinOpInfo { + ComplexPairTy LHS; + ComplexPairTy RHS; + QualType Ty; // Computation Type. + }; + + BinOpInfo EmitBinOps(const BinaryOperator *E); + ComplexPairTy EmitCompoundAssign(const CompoundAssignOperator *E, + ComplexPairTy (ComplexExprEmitter::*Func) + (const BinOpInfo &)); + + ComplexPairTy EmitBinAdd(const BinOpInfo &Op); + ComplexPairTy EmitBinSub(const BinOpInfo &Op); + ComplexPairTy EmitBinMul(const BinOpInfo &Op); + ComplexPairTy EmitBinDiv(const BinOpInfo &Op); + + ComplexPairTy VisitBinMul(const BinaryOperator *E) { + return EmitBinMul(EmitBinOps(E)); + } + ComplexPairTy VisitBinAdd(const BinaryOperator *E) { + return EmitBinAdd(EmitBinOps(E)); + } + ComplexPairTy VisitBinSub(const BinaryOperator *E) { + return EmitBinSub(EmitBinOps(E)); + } + ComplexPairTy VisitBinDiv(const BinaryOperator *E) { + return EmitBinDiv(EmitBinOps(E)); + } + + // Compound assignments. + ComplexPairTy VisitBinAddAssign(const CompoundAssignOperator *E) { + return EmitCompoundAssign(E, &ComplexExprEmitter::EmitBinAdd); + } + ComplexPairTy VisitBinSubAssign(const CompoundAssignOperator *E) { + return EmitCompoundAssign(E, &ComplexExprEmitter::EmitBinSub); + } + ComplexPairTy VisitBinMulAssign(const CompoundAssignOperator *E) { + return EmitCompoundAssign(E, &ComplexExprEmitter::EmitBinMul); + } + ComplexPairTy VisitBinDivAssign(const CompoundAssignOperator *E) { + return EmitCompoundAssign(E, &ComplexExprEmitter::EmitBinDiv); + } + + // GCC rejects rem/and/or/xor for integer complex. + // Logical and/or always return int, never complex. + + // No comparisons produce a complex result. + ComplexPairTy VisitBinAssign (const BinaryOperator *E); + ComplexPairTy VisitBinComma (const BinaryOperator *E); + + + ComplexPairTy VisitConditionalOperator(const ConditionalOperator *CO); + ComplexPairTy VisitChooseExpr(ChooseExpr *CE); +}; +} // end anonymous namespace. + +//===----------------------------------------------------------------------===// +// Utilities +//===----------------------------------------------------------------------===// + +/// EmitLoadOfComplex - Given an RValue reference for a complex, emit code to +/// load the real and imaginary pieces, returning them as Real/Imag. +ComplexPairTy ComplexExprEmitter::EmitLoadOfComplex(llvm::Value *SrcPtr, + bool isVolatile) { + llvm::Constant *Zero = llvm::ConstantInt::get(llvm::Type::Int32Ty, 0); + llvm::Constant *One = llvm::ConstantInt::get(llvm::Type::Int32Ty, 1); + + llvm::SmallString<64> Name(SrcPtr->getNameStart(), + SrcPtr->getNameStart()+SrcPtr->getNameLen()); + + Name += ".realp"; + llvm::Value *Ops[] = {Zero, Zero}; + llvm::Value *RealPtr = Builder.CreateGEP(SrcPtr, Ops, Ops+2, Name.c_str()); + + Name.pop_back(); // .realp -> .real + llvm::Value *Real = Builder.CreateLoad(RealPtr, isVolatile, Name.c_str()); + + Name.resize(Name.size()-4); // .real -> .imagp + Name += "imagp"; + + Ops[1] = One; // { Ops = { Zero, One } + llvm::Value *ImagPtr = Builder.CreateGEP(SrcPtr, Ops, Ops+2, Name.c_str()); + + Name.pop_back(); // .imagp -> .imag + llvm::Value *Imag = Builder.CreateLoad(ImagPtr, isVolatile, Name.c_str()); + return ComplexPairTy(Real, Imag); +} + +/// EmitStoreOfComplex - Store the specified real/imag parts into the +/// specified value pointer. +void ComplexExprEmitter::EmitStoreOfComplex(ComplexPairTy Val, llvm::Value *Ptr, + bool isVolatile) { + llvm::Constant *Zero = llvm::ConstantInt::get(llvm::Type::Int32Ty, 0); + llvm::Constant *One = llvm::ConstantInt::get(llvm::Type::Int32Ty, 1); + + llvm::Value *Ops[] = {Zero, Zero}; + llvm::Value *RealPtr = Builder.CreateGEP(Ptr, Ops, Ops+2, "real"); + + Ops[1] = One; // { Ops = { Zero, One } + llvm::Value *ImagPtr = Builder.CreateGEP(Ptr, Ops, Ops+2, "imag"); + + Builder.CreateStore(Val.first, RealPtr, isVolatile); + Builder.CreateStore(Val.second, ImagPtr, isVolatile); +} + + + +//===----------------------------------------------------------------------===// +// Visitor Methods +//===----------------------------------------------------------------------===// + +ComplexPairTy ComplexExprEmitter::VisitExpr(Expr *E) { + CGF.WarnUnsupported(E, "complex expression"); + const llvm::Type *EltTy = + CGF.ConvertType(E->getType()->getAsComplexType()->getElementType()); + llvm::Value *U = llvm::UndefValue::get(EltTy); + return ComplexPairTy(U, U); +} + +ComplexPairTy ComplexExprEmitter:: +VisitImaginaryLiteral(const ImaginaryLiteral *IL) { + llvm::Value *Imag = CGF.EmitScalarExpr(IL->getSubExpr()); + return ComplexPairTy(llvm::Constant::getNullValue(Imag->getType()), Imag); +} + + +ComplexPairTy ComplexExprEmitter::VisitCallExpr(const CallExpr *E) { + return CGF.EmitCallExpr(E).getComplexVal(); +} + +ComplexPairTy ComplexExprEmitter::VisitOverloadExpr(const OverloadExpr *E) { + return CGF.EmitCallExpr(E->getFn(), E->arg_begin(), + E->getNumArgs(CGF.getContext())).getComplexVal(); +} + +ComplexPairTy ComplexExprEmitter::VisitStmtExpr(const StmtExpr *E) { + return CGF.EmitCompoundStmt(*E->getSubStmt(), true).getComplexVal(); +} + +/// EmitComplexToComplexCast - Emit a cast from complex value Val to DestType. +ComplexPairTy ComplexExprEmitter::EmitComplexToComplexCast(ComplexPairTy Val, + QualType SrcType, + QualType DestType) { + // Get the src/dest element type. + SrcType = cast<ComplexType>(SrcType.getCanonicalType())->getElementType(); + DestType = cast<ComplexType>(DestType.getCanonicalType())->getElementType(); + + // C99 6.3.1.6: When a value of complextype is converted to another + // complex type, both the real and imaginary parts followthe conversion + // rules for the corresponding real types. + Val.first = CGF.EmitScalarConversion(Val.first, SrcType, DestType); + Val.second = CGF.EmitScalarConversion(Val.second, SrcType, DestType); + return Val; +} + +ComplexPairTy ComplexExprEmitter::EmitCast(Expr *Op, QualType DestTy) { + // Two cases here: cast from (complex to complex) and (scalar to complex). + if (Op->getType()->isComplexType()) + return EmitComplexToComplexCast(Visit(Op), Op->getType(), DestTy); + + // C99 6.3.1.7: When a value of real type is converted to a complex type, the + // real part of the complex result value is determined by the rules of + // conversion to the corresponding real type and the imaginary part of the + // complex result value is a positive zero or an unsigned zero. + llvm::Value *Elt = CGF.EmitScalarExpr(Op); + + // Convert the input element to the element type of the complex. + DestTy = cast<ComplexType>(DestTy.getCanonicalType())->getElementType(); + Elt = CGF.EmitScalarConversion(Elt, Op->getType(), DestTy); + + // Return (realval, 0). + return ComplexPairTy(Elt, llvm::Constant::getNullValue(Elt->getType())); +} + +ComplexPairTy ComplexExprEmitter::VisitPrePostIncDec(const UnaryOperator *E, + bool isInc, bool isPre) { + LValue LV = CGF.EmitLValue(E->getSubExpr()); + // FIXME: Handle volatile! + ComplexPairTy InVal = EmitLoadOfComplex(LV.getAddress(), false); + + uint64_t AmountVal = isInc ? 1 : -1; + + llvm::Value *NextVal; + if (isa<llvm::IntegerType>(InVal.first->getType())) + NextVal = llvm::ConstantInt::get(InVal.first->getType(), AmountVal); + else if (InVal.first->getType() == llvm::Type::FloatTy) + // FIXME: Handle long double. + NextVal = + llvm::ConstantFP::get(InVal.first->getType(), + llvm::APFloat(static_cast<float>(AmountVal))); + else { + // FIXME: Handle long double. + assert(InVal.first->getType() == llvm::Type::DoubleTy); + NextVal = + llvm::ConstantFP::get(InVal.first->getType(), + llvm::APFloat(static_cast<double>(AmountVal))); + } + + // Add the inc/dec to the real part. + NextVal = Builder.CreateAdd(InVal.first, NextVal, isInc ? "inc" : "dec"); + + ComplexPairTy IncVal(NextVal, InVal.second); + + // Store the updated result through the lvalue. + EmitStoreOfComplex(IncVal, LV.getAddress(), false); /* FIXME: Volatile */ + + // If this is a postinc, return the value read from memory, otherwise use the + // updated value. + return isPre ? IncVal : InVal; +} + +ComplexPairTy ComplexExprEmitter::VisitUnaryMinus(const UnaryOperator *E) { + ComplexPairTy Op = Visit(E->getSubExpr()); + llvm::Value *ResR = Builder.CreateNeg(Op.first, "neg.r"); + llvm::Value *ResI = Builder.CreateNeg(Op.second, "neg.i"); + return ComplexPairTy(ResR, ResI); +} + +ComplexPairTy ComplexExprEmitter::VisitUnaryNot(const UnaryOperator *E) { + // ~(a+ib) = a + i*-b + ComplexPairTy Op = Visit(E->getSubExpr()); + llvm::Value *ResI = Builder.CreateNeg(Op.second, "conj.i"); + return ComplexPairTy(Op.first, ResI); +} + +ComplexPairTy ComplexExprEmitter::EmitBinAdd(const BinOpInfo &Op) { + llvm::Value *ResR = Builder.CreateAdd(Op.LHS.first, Op.RHS.first, "add.r"); + llvm::Value *ResI = Builder.CreateAdd(Op.LHS.second, Op.RHS.second, "add.i"); + return ComplexPairTy(ResR, ResI); +} + +ComplexPairTy ComplexExprEmitter::EmitBinSub(const BinOpInfo &Op) { + llvm::Value *ResR = Builder.CreateSub(Op.LHS.first, Op.RHS.first, "sub.r"); + llvm::Value *ResI = Builder.CreateSub(Op.LHS.second, Op.RHS.second, "sub.i"); + return ComplexPairTy(ResR, ResI); +} + + +ComplexPairTy ComplexExprEmitter::EmitBinMul(const BinOpInfo &Op) { + llvm::Value *ResRl = Builder.CreateMul(Op.LHS.first, Op.RHS.first, "mul.rl"); + llvm::Value *ResRr = Builder.CreateMul(Op.LHS.second, Op.RHS.second,"mul.rr"); + llvm::Value *ResR = Builder.CreateSub(ResRl, ResRr, "mul.r"); + + llvm::Value *ResIl = Builder.CreateMul(Op.LHS.second, Op.RHS.first, "mul.il"); + llvm::Value *ResIr = Builder.CreateMul(Op.LHS.first, Op.RHS.second, "mul.ir"); + llvm::Value *ResI = Builder.CreateAdd(ResIl, ResIr, "mul.i"); + return ComplexPairTy(ResR, ResI); +} + +ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { + llvm::Value *LHSr = Op.LHS.first, *LHSi = Op.LHS.second; + llvm::Value *RHSr = Op.RHS.first, *RHSi = Op.RHS.second; + + // (a+ib) / (c+id) = ((ac+bd)/(cc+dd)) + i((bc-ad)/(cc+dd)) + llvm::Value *Tmp1 = Builder.CreateMul(LHSr, RHSr, "tmp"); // a*c + llvm::Value *Tmp2 = Builder.CreateMul(LHSi, RHSi, "tmp"); // b*d + llvm::Value *Tmp3 = Builder.CreateAdd(Tmp1, Tmp2, "tmp"); // ac+bd + + llvm::Value *Tmp4 = Builder.CreateMul(RHSr, RHSr, "tmp"); // c*c + llvm::Value *Tmp5 = Builder.CreateMul(RHSi, RHSi, "tmp"); // d*d + llvm::Value *Tmp6 = Builder.CreateAdd(Tmp4, Tmp5, "tmp"); // cc+dd + + llvm::Value *Tmp7 = Builder.CreateMul(LHSi, RHSr, "tmp"); // b*c + llvm::Value *Tmp8 = Builder.CreateMul(LHSr, RHSi, "tmp"); // a*d + llvm::Value *Tmp9 = Builder.CreateSub(Tmp7, Tmp8, "tmp"); // bc-ad + + llvm::Value *DSTr, *DSTi; + if (Tmp3->getType()->isFloatingPoint()) { + DSTr = Builder.CreateFDiv(Tmp3, Tmp6, "tmp"); + DSTi = Builder.CreateFDiv(Tmp9, Tmp6, "tmp"); + } else { + if (Op.Ty->getAsComplexType()->getElementType()->isUnsignedIntegerType()) { + DSTr = Builder.CreateUDiv(Tmp3, Tmp6, "tmp"); + DSTi = Builder.CreateUDiv(Tmp9, Tmp6, "tmp"); + } else { + DSTr = Builder.CreateSDiv(Tmp3, Tmp6, "tmp"); + DSTi = Builder.CreateSDiv(Tmp9, Tmp6, "tmp"); + } + } + + return ComplexPairTy(DSTr, DSTi); +} + +ComplexExprEmitter::BinOpInfo +ComplexExprEmitter::EmitBinOps(const BinaryOperator *E) { + BinOpInfo Ops; + Ops.LHS = Visit(E->getLHS()); + Ops.RHS = Visit(E->getRHS()); + Ops.Ty = E->getType(); + return Ops; +} + + +// Compound assignments. +ComplexPairTy ComplexExprEmitter:: +EmitCompoundAssign(const CompoundAssignOperator *E, + ComplexPairTy (ComplexExprEmitter::*Func)(const BinOpInfo&)){ + QualType LHSTy = E->getLHS()->getType(), RHSTy = E->getRHS()->getType(); + + // Load the LHS and RHS operands. + LValue LHSLV = CGF.EmitLValue(E->getLHS()); + + BinOpInfo OpInfo; + OpInfo.Ty = E->getComputationType(); + + // We know the LHS is a complex lvalue. + OpInfo.LHS = EmitLoadOfComplex(LHSLV.getAddress(), false);// FIXME: Volatile. + OpInfo.LHS = EmitComplexToComplexCast(OpInfo.LHS, LHSTy, OpInfo.Ty); + + // It is possible for the RHS to be complex or scalar. + OpInfo.RHS = EmitCast(E->getRHS(), OpInfo.Ty); + + // Expand the binary operator. + ComplexPairTy Result = (this->*Func)(OpInfo); + + // Truncate the result back to the LHS type. + Result = EmitComplexToComplexCast(Result, OpInfo.Ty, LHSTy); + + // Store the result value into the LHS lvalue. + EmitStoreOfComplex(Result, LHSLV.getAddress(), false); // FIXME: VOLATILE + return Result; +} + +ComplexPairTy ComplexExprEmitter::VisitBinAssign(const BinaryOperator *E) { + assert(E->getLHS()->getType().getCanonicalType() == + E->getRHS()->getType().getCanonicalType() && "Invalid assignment"); + // Emit the RHS. + ComplexPairTy Val = Visit(E->getRHS()); + + // Compute the address to store into. + LValue LHS = CGF.EmitLValue(E->getLHS()); + + // Store into it. + // FIXME: Volatility! + EmitStoreOfComplex(Val, LHS.getAddress(), false); + return Val; +} + +ComplexPairTy ComplexExprEmitter::VisitBinComma(const BinaryOperator *E) { + CGF.EmitStmt(E->getLHS()); + return Visit(E->getRHS()); +} + +ComplexPairTy ComplexExprEmitter:: +VisitConditionalOperator(const ConditionalOperator *E) { + llvm::BasicBlock *LHSBlock = new llvm::BasicBlock("cond.?"); + llvm::BasicBlock *RHSBlock = new llvm::BasicBlock("cond.:"); + llvm::BasicBlock *ContBlock = new llvm::BasicBlock("cond.cont"); + + llvm::Value *Cond = CGF.EvaluateExprAsBool(E->getCond()); + Builder.CreateCondBr(Cond, LHSBlock, RHSBlock); + + CGF.EmitBlock(LHSBlock); + + // Handle the GNU extension for missing LHS. + assert(E->getLHS() && "Must have LHS for complex value"); + + ComplexPairTy LHS = Visit(E->getLHS()); + Builder.CreateBr(ContBlock); + LHSBlock = Builder.GetInsertBlock(); + + CGF.EmitBlock(RHSBlock); + + ComplexPairTy RHS = Visit(E->getRHS()); + Builder.CreateBr(ContBlock); + RHSBlock = Builder.GetInsertBlock(); + + CGF.EmitBlock(ContBlock); + + // Create a PHI node for the real part. + llvm::PHINode *RealPN = Builder.CreatePHI(LHS.first->getType(), "cond.r"); + RealPN->reserveOperandSpace(2); + RealPN->addIncoming(LHS.first, LHSBlock); + RealPN->addIncoming(RHS.first, RHSBlock); + + // Create a PHI node for the imaginary part. + llvm::PHINode *ImagPN = Builder.CreatePHI(LHS.first->getType(), "cond.i"); + ImagPN->reserveOperandSpace(2); + ImagPN->addIncoming(LHS.second, LHSBlock); + ImagPN->addIncoming(RHS.second, RHSBlock); + + return ComplexPairTy(RealPN, ImagPN); +} + +ComplexPairTy ComplexExprEmitter::VisitChooseExpr(ChooseExpr *E) { + // Emit the LHS or RHS as appropriate. + return Visit(E->isConditionTrue(CGF.getContext()) ? E->getLHS() :E->getRHS()); +} + +//===----------------------------------------------------------------------===// +// Entry Point into this File +//===----------------------------------------------------------------------===// + +/// EmitComplexExpr - Emit the computation of the specified expression of +/// complex type, ignoring the result. +ComplexPairTy CodeGenFunction::EmitComplexExpr(const Expr *E) { + assert(E && E->getType()->isComplexType() && + "Invalid complex expression to emit"); + + return ComplexExprEmitter(*this).Visit(const_cast<Expr*>(E)); +} + +/// EmitComplexExprIntoAddr - Emit the computation of the specified expression +/// of complex type, storing into the specified Value*. +void CodeGenFunction::EmitComplexExprIntoAddr(const Expr *E, + llvm::Value *DestAddr, + bool DestIsVolatile) { + assert(E && E->getType()->isComplexType() && + "Invalid complex expression to emit"); + ComplexExprEmitter Emitter(*this); + ComplexPairTy Val = Emitter.Visit(const_cast<Expr*>(E)); + Emitter.EmitStoreOfComplex(Val, DestAddr, DestIsVolatile); +} + +/// LoadComplexFromAddr - Load a complex number from the specified address. +ComplexPairTy CodeGenFunction::LoadComplexFromAddr(llvm::Value *SrcAddr, + bool SrcIsVolatile) { + return ComplexExprEmitter(*this).EmitLoadOfComplex(SrcAddr, SrcIsVolatile); +} diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp new file mode 100644 index 00000000000..e2405b88f37 --- /dev/null +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -0,0 +1,627 @@ +//===--- CGExprConstant.cpp - Emit LLVM Code from Constant Expressions ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Constant Expr nodes as LLVM code. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "CodeGenModule.h" +#include "clang/AST/AST.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Support/Compiler.h" +using namespace clang; +using namespace CodeGen; + +namespace { +class VISIBILITY_HIDDEN ConstExprEmitter : + public StmtVisitor<ConstExprEmitter, llvm::Constant*> { + CodeGenModule &CGM; + CodeGenFunction *CGF; +public: + ConstExprEmitter(CodeGenModule &cgm, CodeGenFunction *cgf) + : CGM(cgm), CGF(cgf) { + } + + //===--------------------------------------------------------------------===// + // Visitor Methods + //===--------------------------------------------------------------------===// + + llvm::Constant *VisitStmt(Stmt *S) { + CGM.WarnUnsupported(S, "constant expression"); + QualType T = cast<Expr>(S)->getType(); + return llvm::UndefValue::get(CGM.getTypes().ConvertType(T)); + } + + llvm::Constant *VisitParenExpr(ParenExpr *PE) { + return Visit(PE->getSubExpr()); + } + + // Leaves + llvm::Constant *VisitIntegerLiteral(const IntegerLiteral *E) { + return llvm::ConstantInt::get(E->getValue()); + } + llvm::Constant *VisitFloatingLiteral(const FloatingLiteral *E) { + return llvm::ConstantFP::get(ConvertType(E->getType()), E->getValue()); + } + llvm::Constant *VisitCharacterLiteral(const CharacterLiteral *E) { + return llvm::ConstantInt::get(ConvertType(E->getType()), E->getValue()); + } + llvm::Constant *VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *E) { + return llvm::ConstantInt::get(ConvertType(E->getType()), E->getValue()); + } + + llvm::Constant *VisitCompoundLiteralExpr(CompoundLiteralExpr *E) { + return Visit(E->getInitializer()); + } + + llvm::Constant *VisitCastExpr(const CastExpr* E) { + llvm::Constant *C = Visit(E->getSubExpr()); + + return EmitConversion(C, E->getSubExpr()->getType(), E->getType()); + } + + llvm::Constant *EmitArrayInitialization(InitListExpr *ILE, + const llvm::ArrayType *AType) { + std::vector<llvm::Constant*> Elts; + unsigned NumInitElements = ILE->getNumInits(); + // FIXME: Check for wide strings + if (NumInitElements > 0 && isa<StringLiteral>(ILE->getInit(0)) && + ILE->getType()->getAsArrayType()->getElementType()->isCharType()) + return Visit(ILE->getInit(0)); + const llvm::Type *ElemTy = AType->getElementType(); + unsigned NumElements = AType->getNumElements(); + + // Initialising an array requires us to automatically + // initialise any elements that have not been initialised explicitly + unsigned NumInitableElts = std::min(NumInitElements, NumElements); + + // Copy initializer elements. + unsigned i = 0; + for (; i < NumInitableElts; ++i) { + + llvm::Constant *C = Visit(ILE->getInit(i)); + // FIXME: Remove this when sema of initializers is finished (and the code + // above). + if (C == 0 && ILE->getInit(i)->getType()->isVoidType()) { + if (ILE->getType()->isVoidType()) return 0; + return llvm::UndefValue::get(AType); + } + assert (C && "Failed to create initializer expression"); + Elts.push_back(C); + } + + // Initialize remaining array elements. + for (; i < NumElements; ++i) + Elts.push_back(llvm::Constant::getNullValue(ElemTy)); + + return llvm::ConstantArray::get(AType, Elts); + } + + llvm::Constant *EmitStructInitialization(InitListExpr *ILE, + const llvm::StructType *SType) { + + TagDecl *TD = ILE->getType()->getAsRecordType()->getDecl(); + std::vector<llvm::Constant*> Elts; + const CGRecordLayout *CGR = CGM.getTypes().getCGRecordLayout(TD); + unsigned NumInitElements = ILE->getNumInits(); + unsigned NumElements = SType->getNumElements(); + + // Initialising an structure requires us to automatically + // initialise any elements that have not been initialised explicitly + unsigned NumInitableElts = std::min(NumInitElements, NumElements); + + // Copy initializer elements. Skip padding fields. + unsigned EltNo = 0; // Element no in ILE + unsigned FieldNo = 0; // Field no in SType + while (EltNo < NumInitableElts) { + + // Zero initialize padding field. + if (CGR->isPaddingField(FieldNo)) { + const llvm::Type *FieldTy = SType->getElementType(FieldNo); + Elts.push_back(llvm::Constant::getNullValue(FieldTy)); + FieldNo++; + continue; + } + + llvm::Constant *C = Visit(ILE->getInit(EltNo)); + // FIXME: Remove this when sema of initializers is finished (and the code + // above). + if (C == 0 && ILE->getInit(EltNo)->getType()->isVoidType()) { + if (ILE->getType()->isVoidType()) return 0; + return llvm::UndefValue::get(SType); + } + assert (C && "Failed to create initializer expression"); + Elts.push_back(C); + EltNo++; + FieldNo++; + } + + // Initialize remaining structure elements. + for (unsigned i = Elts.size(); i < NumElements; ++i) { + const llvm::Type *FieldTy = SType->getElementType(i); + Elts.push_back(llvm::Constant::getNullValue(FieldTy)); + } + + return llvm::ConstantStruct::get(SType, Elts); + } + + llvm::Constant *EmitVectorInitialization(InitListExpr *ILE, + const llvm::VectorType *VType) { + + std::vector<llvm::Constant*> Elts; + unsigned NumInitElements = ILE->getNumInits(); + unsigned NumElements = VType->getNumElements(); + + assert (NumInitElements == NumElements + && "Unsufficient vector init elelments"); + // Copy initializer elements. + unsigned i = 0; + for (; i < NumElements; ++i) { + + llvm::Constant *C = Visit(ILE->getInit(i)); + // FIXME: Remove this when sema of initializers is finished (and the code + // above). + if (C == 0 && ILE->getInit(i)->getType()->isVoidType()) { + if (ILE->getType()->isVoidType()) return 0; + return llvm::UndefValue::get(VType); + } + assert (C && "Failed to create initializer expression"); + Elts.push_back(C); + } + + return llvm::ConstantVector::get(VType, Elts); + } + + llvm::Constant *VisitInitListExpr(InitListExpr *ILE) { + const llvm::CompositeType *CType = + dyn_cast<llvm::CompositeType>(ConvertType(ILE->getType())); + + if (!CType) { + // We have a scalar in braces. Just use the first element. + return Visit(ILE->getInit(0)); + } + + if (const llvm::ArrayType *AType = dyn_cast<llvm::ArrayType>(CType)) + return EmitArrayInitialization(ILE, AType); + + if (const llvm::StructType *SType = dyn_cast<llvm::StructType>(CType)) + return EmitStructInitialization(ILE, SType); + + if (const llvm::VectorType *VType = dyn_cast<llvm::VectorType>(CType)) + return EmitVectorInitialization(ILE, VType); + + // Make sure we have an array at this point + assert(0 && "Unable to handle InitListExpr"); + // Get rid of control reaches end of void function warning. + // Not reached. + return 0; + } + + llvm::Constant *VisitImplicitCastExpr(ImplicitCastExpr *ICExpr) { + Expr* SExpr = ICExpr->getSubExpr(); + QualType SType = SExpr->getType(); + llvm::Constant *C; // the intermediate expression + QualType T; // the type of the intermediate expression + if (SType->isArrayType()) { + // Arrays decay to a pointer to the first element + // VLAs would require special handling, but they can't occur here + C = EmitLValue(SExpr); + llvm::Constant *Idx0 = llvm::ConstantInt::get(llvm::Type::Int32Ty, 0); + llvm::Constant *Ops[] = {Idx0, Idx0}; + C = llvm::ConstantExpr::getGetElementPtr(C, Ops, 2); + + QualType ElemType = SType->getAsArrayType()->getElementType(); + T = CGM.getContext().getPointerType(ElemType); + } else if (SType->isFunctionType()) { + // Function types decay to a pointer to the function + C = EmitLValue(SExpr); + T = CGM.getContext().getPointerType(SType); + } else { + C = Visit(SExpr); + T = SType; + } + + // Perform the conversion; note that an implicit cast can both promote + // and convert an array/function + return EmitConversion(C, T, ICExpr->getType()); + } + + llvm::Constant *VisitStringLiteral(StringLiteral *E) { + const char *StrData = E->getStrData(); + unsigned Len = E->getByteLength(); + assert(!E->getType()->isPointerType() && "Strings are always arrays"); + + // Otherwise this must be a string initializing an array in a static + // initializer. Don't emit it as the address of the string, emit the string + // data itself as an inline array. + const ConstantArrayType *CAT = E->getType()->getAsConstantArrayType(); + assert(CAT && "String isn't pointer or array!"); + + std::string Str(StrData, StrData + Len); + // Null terminate the string before potentially truncating it. + // FIXME: What about wchar_t strings? + Str.push_back(0); + + uint64_t RealLen = CAT->getSize().getZExtValue(); + // String or grow the initializer to the required size. + if (RealLen != Str.size()) + Str.resize(RealLen); + + return llvm::ConstantArray::get(Str, false); + } + + llvm::Constant *VisitDeclRefExpr(DeclRefExpr *E) { + const ValueDecl *Decl = E->getDecl(); + if (const EnumConstantDecl *EC = dyn_cast<EnumConstantDecl>(Decl)) + return llvm::ConstantInt::get(EC->getInitVal()); + assert(0 && "Unsupported decl ref type!"); + return 0; + } + + llvm::Constant *VisitSizeOfAlignOfTypeExpr(const SizeOfAlignOfTypeExpr *E) { + return EmitSizeAlignOf(E->getArgumentType(), E->getType(), E->isSizeOf()); + } + + // Unary operators + llvm::Constant *VisitUnaryPlus(const UnaryOperator *E) { + return Visit(E->getSubExpr()); + } + llvm::Constant *VisitUnaryMinus(const UnaryOperator *E) { + return llvm::ConstantExpr::getNeg(Visit(E->getSubExpr())); + } + llvm::Constant *VisitUnaryNot(const UnaryOperator *E) { + return llvm::ConstantExpr::getNot(Visit(E->getSubExpr())); + } + llvm::Constant *VisitUnaryLNot(const UnaryOperator *E) { + llvm::Constant *SubExpr = Visit(E->getSubExpr()); + + if (E->getSubExpr()->getType()->isRealFloatingType()) { + // Compare against 0.0 for fp scalars. + llvm::Constant *Zero = llvm::Constant::getNullValue(SubExpr->getType()); + SubExpr = llvm::ConstantExpr::getFCmp(llvm::FCmpInst::FCMP_UEQ, SubExpr, + Zero); + } else { + assert((E->getSubExpr()->getType()->isIntegerType() || + E->getSubExpr()->getType()->isPointerType()) && + "Unknown scalar type to convert"); + // Compare against an integer or pointer null. + llvm::Constant *Zero = llvm::Constant::getNullValue(SubExpr->getType()); + SubExpr = llvm::ConstantExpr::getICmp(llvm::ICmpInst::ICMP_EQ, SubExpr, + Zero); + } + + return llvm::ConstantExpr::getZExt(SubExpr, ConvertType(E->getType())); + } + llvm::Constant *VisitUnarySizeOf(const UnaryOperator *E) { + return EmitSizeAlignOf(E->getSubExpr()->getType(), E->getType(), true); + } + llvm::Constant *VisitUnaryAlignOf(const UnaryOperator *E) { + return EmitSizeAlignOf(E->getSubExpr()->getType(), E->getType(), false); + } + llvm::Constant *VisitUnaryAddrOf(const UnaryOperator *E) { + return EmitLValue(E->getSubExpr()); + } + llvm::Constant *VisitUnaryOffsetOf(const UnaryOperator *E) { + int64_t Val = E->evaluateOffsetOf(CGM.getContext()); + + assert(E->getType()->isIntegerType() && "Result type must be an integer!"); + + uint32_t ResultWidth = + static_cast<uint32_t>(CGM.getContext().getTypeSize(E->getType())); + return llvm::ConstantInt::get(llvm::APInt(ResultWidth, Val)); + } + + // Binary operators + llvm::Constant *VisitBinOr(const BinaryOperator *E) { + llvm::Constant *LHS = Visit(E->getLHS()); + llvm::Constant *RHS = Visit(E->getRHS()); + + return llvm::ConstantExpr::getOr(LHS, RHS); + } + llvm::Constant *VisitBinSub(const BinaryOperator *E) { + llvm::Constant *LHS = Visit(E->getLHS()); + llvm::Constant *RHS = Visit(E->getRHS()); + + if (!isa<llvm::PointerType>(RHS->getType())) { + // pointer - int + if (isa<llvm::PointerType>(LHS->getType())) { + llvm::Constant *Idx = llvm::ConstantExpr::getNeg(RHS); + + return llvm::ConstantExpr::getGetElementPtr(LHS, &Idx, 1); + } + + // int - int + return llvm::ConstantExpr::getSub(LHS, RHS); + } + + assert(0 && "Unhandled bin sub case!"); + return 0; + } + + llvm::Constant *VisitBinShl(const BinaryOperator *E) { + llvm::Constant *LHS = Visit(E->getLHS()); + llvm::Constant *RHS = Visit(E->getRHS()); + + // LLVM requires the LHS and RHS to be the same type: promote or truncate the + // RHS to the same size as the LHS. + if (LHS->getType() != RHS->getType()) + RHS = llvm::ConstantExpr::getIntegerCast(RHS, LHS->getType(), false); + + return llvm::ConstantExpr::getShl(LHS, RHS); + } + + llvm::Constant *VisitBinMul(const BinaryOperator *E) { + llvm::Constant *LHS = Visit(E->getLHS()); + llvm::Constant *RHS = Visit(E->getRHS()); + + return llvm::ConstantExpr::getMul(LHS, RHS); + } + + llvm::Constant *VisitBinDiv(const BinaryOperator *E) { + llvm::Constant *LHS = Visit(E->getLHS()); + llvm::Constant *RHS = Visit(E->getRHS()); + + if (LHS->getType()->isFPOrFPVector()) + return llvm::ConstantExpr::getFDiv(LHS, RHS); + else if (E->getType()->isUnsignedIntegerType()) + return llvm::ConstantExpr::getUDiv(LHS, RHS); + else + return llvm::ConstantExpr::getSDiv(LHS, RHS); + } + + llvm::Constant *VisitBinAdd(const BinaryOperator *E) { + llvm::Constant *LHS = Visit(E->getLHS()); + llvm::Constant *RHS = Visit(E->getRHS()); + + if (!E->getType()->isPointerType()) + return llvm::ConstantExpr::getAdd(LHS, RHS); + + llvm::Constant *Ptr, *Idx; + if (isa<llvm::PointerType>(LHS->getType())) { // pointer + int + Ptr = LHS; + Idx = RHS; + } else { // int + pointer + Ptr = RHS; + Idx = LHS; + } + + return llvm::ConstantExpr::getGetElementPtr(Ptr, &Idx, 1); + } + + llvm::Constant *VisitBinAnd(const BinaryOperator *E) { + llvm::Constant *LHS = Visit(E->getLHS()); + llvm::Constant *RHS = Visit(E->getRHS()); + + return llvm::ConstantExpr::getAnd(LHS, RHS); + } + + // Utility methods + const llvm::Type *ConvertType(QualType T) { + return CGM.getTypes().ConvertType(T); + } + + llvm::Constant *EmitConversionToBool(llvm::Constant *Src, QualType SrcType) { + assert(SrcType->isCanonical() && "EmitConversion strips typedefs"); + + if (SrcType->isRealFloatingType()) { + // Compare against 0.0 for fp scalars. + llvm::Constant *Zero = llvm::Constant::getNullValue(Src->getType()); + return llvm::ConstantExpr::getFCmp(llvm::FCmpInst::FCMP_UNE, Src, Zero); + } + + assert((SrcType->isIntegerType() || SrcType->isPointerType()) && + "Unknown scalar type to convert"); + + // Compare against an integer or pointer null. + llvm::Constant *Zero = llvm::Constant::getNullValue(Src->getType()); + return llvm::ConstantExpr::getICmp(llvm::ICmpInst::ICMP_NE, Src, Zero); + } + + llvm::Constant *EmitConversion(llvm::Constant *Src, QualType SrcType, + QualType DstType) { + SrcType = SrcType.getCanonicalType(); + DstType = DstType.getCanonicalType(); + if (SrcType == DstType) return Src; + + // Handle conversions to bool first, they are special: comparisons against 0. + if (DstType->isBooleanType()) + return EmitConversionToBool(Src, SrcType); + + const llvm::Type *DstTy = ConvertType(DstType); + + // Ignore conversions like int -> uint. + if (Src->getType() == DstTy) + return Src; + + // Handle pointer conversions next: pointers can only be converted to/from + // other pointers and integers. + if (isa<PointerType>(DstType)) { + // The source value may be an integer, or a pointer. + if (isa<llvm::PointerType>(Src->getType())) + return llvm::ConstantExpr::getBitCast(Src, DstTy); + assert(SrcType->isIntegerType() &&"Not ptr->ptr or int->ptr conversion?"); + return llvm::ConstantExpr::getIntToPtr(Src, DstTy); + } + + if (isa<PointerType>(SrcType)) { + // Must be an ptr to int cast. + assert(isa<llvm::IntegerType>(DstTy) && "not ptr->int?"); + return llvm::ConstantExpr::getPtrToInt(Src, DstTy); + } + + // A scalar source can be splatted to a vector of the same element type + if (isa<llvm::VectorType>(DstTy) && !isa<VectorType>(SrcType)) { + const llvm::VectorType *VT = cast<llvm::VectorType>(DstTy); + assert((VT->getElementType() == Src->getType()) && + "Vector element type must match scalar type to splat."); + unsigned NumElements = DstType->getAsVectorType()->getNumElements(); + llvm::SmallVector<llvm::Constant*, 16> Elements; + for (unsigned i = 0; i < NumElements; i++) + Elements.push_back(Src); + + return llvm::ConstantVector::get(&Elements[0], NumElements); + } + + if (isa<llvm::VectorType>(Src->getType()) || + isa<llvm::VectorType>(DstTy)) { + return llvm::ConstantExpr::getBitCast(Src, DstTy); + } + + // Finally, we have the arithmetic types: real int/float. + if (isa<llvm::IntegerType>(Src->getType())) { + bool InputSigned = SrcType->isSignedIntegerType(); + if (isa<llvm::IntegerType>(DstTy)) + return llvm::ConstantExpr::getIntegerCast(Src, DstTy, InputSigned); + else if (InputSigned) + return llvm::ConstantExpr::getSIToFP(Src, DstTy); + else + return llvm::ConstantExpr::getUIToFP(Src, DstTy); + } + + assert(Src->getType()->isFloatingPoint() && "Unknown real conversion"); + if (isa<llvm::IntegerType>(DstTy)) { + if (DstType->isSignedIntegerType()) + return llvm::ConstantExpr::getFPToSI(Src, DstTy); + else + return llvm::ConstantExpr::getFPToUI(Src, DstTy); + } + + assert(DstTy->isFloatingPoint() && "Unknown real conversion"); + if (DstTy->getTypeID() < Src->getType()->getTypeID()) + return llvm::ConstantExpr::getFPTrunc(Src, DstTy); + else + return llvm::ConstantExpr::getFPExtend(Src, DstTy); + } + + llvm::Constant *EmitSizeAlignOf(QualType TypeToSize, + QualType RetType, bool isSizeOf) { + std::pair<uint64_t, unsigned> Info = + CGM.getContext().getTypeInfo(TypeToSize); + + uint64_t Val = isSizeOf ? Info.first : Info.second; + Val /= 8; // Return size in bytes, not bits. + + assert(RetType->isIntegerType() && "Result type must be an integer!"); + + uint32_t ResultWidth = + static_cast<uint32_t>(CGM.getContext().getTypeSize(RetType)); + return llvm::ConstantInt::get(llvm::APInt(ResultWidth, Val)); + } + + llvm::Constant *EmitLValue(Expr *E) { + switch (E->getStmtClass()) { + default: break; + case Expr::ParenExprClass: + // Elide parenthesis + return EmitLValue(cast<ParenExpr>(E)->getSubExpr()); + case Expr::CompoundLiteralExprClass: { + // Note that due to the nature of compound literals, this is guaranteed + // to be the only use of the variable, so we just generate it here. + CompoundLiteralExpr *CLE = cast<CompoundLiteralExpr>(E); + llvm::Constant* C = Visit(CLE->getInitializer()); + C = new llvm::GlobalVariable(C->getType(),E->getType().isConstQualified(), + llvm::GlobalValue::InternalLinkage, + C, ".compoundliteral", &CGM.getModule()); + return C; + } + case Expr::DeclRefExprClass: { + ValueDecl *Decl = cast<DeclRefExpr>(E)->getDecl(); + if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(Decl)) + return CGM.GetAddrOfFunctionDecl(FD, false); + if (const FileVarDecl* VD = dyn_cast<FileVarDecl>(Decl)) + return CGM.GetAddrOfGlobalVar(VD, false); + if (const BlockVarDecl* BVD = dyn_cast<BlockVarDecl>(Decl)) { + assert(CGF && "Can't access static local vars without CGF"); + return CGF->GetAddrOfStaticLocalVar(BVD); + } + break; + } + case Expr::MemberExprClass: { + MemberExpr* ME = cast<MemberExpr>(E); + llvm::Constant *Base; + if (ME->isArrow()) + Base = Visit(ME->getBase()); + else + Base = EmitLValue(ME->getBase()); + + unsigned FieldNumber = CGM.getTypes().getLLVMFieldNo(ME->getMemberDecl()); + llvm::Constant *Zero = llvm::ConstantInt::get(llvm::Type::Int32Ty, 0); + llvm::Constant *Idx = llvm::ConstantInt::get(llvm::Type::Int32Ty, + FieldNumber); + llvm::Value *Ops[] = {Zero, Idx}; + return llvm::ConstantExpr::getGetElementPtr(Base, Ops, 2); + } + case Expr::ArraySubscriptExprClass: { + ArraySubscriptExpr* ASExpr = cast<ArraySubscriptExpr>(E); + llvm::Constant *Base = Visit(ASExpr->getBase()); + llvm::Constant *Index = Visit(ASExpr->getIdx()); + assert(!ASExpr->getBase()->getType()->isVectorType() && + "Taking the address of a vector component is illegal!"); + return llvm::ConstantExpr::getGetElementPtr(Base, &Index, 1); + } + case Expr::StringLiteralClass: { + StringLiteral *String = cast<StringLiteral>(E); + assert(!String->isWide() && "Cannot codegen wide strings yet"); + const char *StrData = String->getStrData(); + unsigned Len = String->getByteLength(); + + return CGM.GetAddrOfConstantString(std::string(StrData, StrData + Len)); + } + case Expr::UnaryOperatorClass: { + UnaryOperator *Exp = cast<UnaryOperator>(E); + switch (Exp->getOpcode()) { + default: break; + case UnaryOperator::Extension: + // Extension is just a wrapper for expressions + return EmitLValue(Exp->getSubExpr()); + case UnaryOperator::Real: + case UnaryOperator::Imag: { + // The address of __real or __imag is just a GEP off the address + // of the internal expression + llvm::Constant* C = EmitLValue(Exp->getSubExpr()); + llvm::Constant *Zero = llvm::ConstantInt::get(llvm::Type::Int32Ty, 0); + llvm::Constant *Idx = llvm::ConstantInt::get(llvm::Type::Int32Ty, + Exp->getOpcode() == UnaryOperator::Imag); + llvm::Value *Ops[] = {Zero, Idx}; + return llvm::ConstantExpr::getGetElementPtr(C, Ops, 2); + } + case UnaryOperator::Deref: + // The address of a deref is just the value of the expression + return Visit(Exp->getSubExpr()); + } + break; + } + } + CGM.WarnUnsupported(E, "constant l-value expression"); + llvm::Type *Ty = llvm::PointerType::getUnqual(ConvertType(E->getType())); + return llvm::UndefValue::get(Ty); + } + +}; + +} // end anonymous namespace. + + +llvm::Constant *CodeGenModule::EmitConstantExpr(const Expr *E, + CodeGenFunction *CGF) +{ + QualType type = E->getType().getCanonicalType(); + + if (type->isIntegerType()) { + llvm::APSInt Value(static_cast<uint32_t>(Context.getTypeSize(type))); + if (E->isIntegerConstantExpr(Value, Context)) { + return llvm::ConstantInt::get(Value); + } + } + + return ConstExprEmitter(*this, CGF).Visit(const_cast<Expr*>(E)); +} diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp new file mode 100644 index 00000000000..892712a0d4c --- /dev/null +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -0,0 +1,1185 @@ +//===--- CGExprScalar.cpp - Emit LLVM Code for Scalar Exprs ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Expr nodes with scalar LLVM types as LLVM code. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "CodeGenModule.h" +#include "clang/AST/AST.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/Compiler.h" +#include <cstdarg> + +using namespace clang; +using namespace CodeGen; +using llvm::Value; + +//===----------------------------------------------------------------------===// +// Scalar Expression Emitter +//===----------------------------------------------------------------------===// + +struct BinOpInfo { + Value *LHS; + Value *RHS; + QualType Ty; // Computation Type. + const BinaryOperator *E; +}; + +namespace { +class VISIBILITY_HIDDEN ScalarExprEmitter + : public StmtVisitor<ScalarExprEmitter, Value*> { + CodeGenFunction &CGF; + llvm::LLVMFoldingBuilder &Builder; + CGObjCRuntime *Runtime; + + +public: + + ScalarExprEmitter(CodeGenFunction &cgf) : CGF(cgf), + Builder(CGF.Builder), + Runtime(CGF.CGM.getObjCRuntime()) { + } + + + //===--------------------------------------------------------------------===// + // Utilities + //===--------------------------------------------------------------------===// + + const llvm::Type *ConvertType(QualType T) { return CGF.ConvertType(T); } + LValue EmitLValue(const Expr *E) { return CGF.EmitLValue(E); } + + Value *EmitLoadOfLValue(LValue LV, QualType T) { + return CGF.EmitLoadOfLValue(LV, T).getScalarVal(); + } + + /// EmitLoadOfLValue - Given an expression with complex type that represents a + /// value l-value, this method emits the address of the l-value, then loads + /// and returns the result. + Value *EmitLoadOfLValue(const Expr *E) { + // FIXME: Volatile + return EmitLoadOfLValue(EmitLValue(E), E->getType()); + } + + /// EmitConversionToBool - Convert the specified expression value to a + /// boolean (i1) truth value. This is equivalent to "Val != 0". + Value *EmitConversionToBool(Value *Src, QualType DstTy); + + /// EmitScalarConversion - Emit a conversion from the specified type to the + /// specified destination type, both of which are LLVM scalar types. + Value *EmitScalarConversion(Value *Src, QualType SrcTy, QualType DstTy); + + /// EmitComplexToScalarConversion - Emit a conversion from the specified + /// complex type to the specified destination type, where the destination + /// type is an LLVM scalar type. + Value *EmitComplexToScalarConversion(CodeGenFunction::ComplexPairTy Src, + QualType SrcTy, QualType DstTy); + + //===--------------------------------------------------------------------===// + // Visitor Methods + //===--------------------------------------------------------------------===// + + Value *VisitStmt(Stmt *S) { + S->dump(CGF.getContext().getSourceManager()); + assert(0 && "Stmt can't have complex result type!"); + return 0; + } + Value *VisitExpr(Expr *S); + Value *VisitParenExpr(ParenExpr *PE) { return Visit(PE->getSubExpr()); } + + // Leaves. + Value *VisitIntegerLiteral(const IntegerLiteral *E) { + return llvm::ConstantInt::get(E->getValue()); + } + Value *VisitFloatingLiteral(const FloatingLiteral *E) { + return llvm::ConstantFP::get(ConvertType(E->getType()), E->getValue()); + } + Value *VisitCharacterLiteral(const CharacterLiteral *E) { + return llvm::ConstantInt::get(ConvertType(E->getType()), E->getValue()); + } + Value *VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *E) { + return llvm::ConstantInt::get(ConvertType(E->getType()), E->getValue()); + } + Value *VisitTypesCompatibleExpr(const TypesCompatibleExpr *E) { + return llvm::ConstantInt::get(ConvertType(E->getType()), + CGF.getContext().typesAreCompatible( + E->getArgType1(), E->getArgType2())); + } + Value *VisitSizeOfAlignOfTypeExpr(const SizeOfAlignOfTypeExpr *E) { + return EmitSizeAlignOf(E->getArgumentType(), E->getType(), E->isSizeOf()); + } + + // l-values. + Value *VisitDeclRefExpr(DeclRefExpr *E) { + if (const EnumConstantDecl *EC = dyn_cast<EnumConstantDecl>(E->getDecl())) + return llvm::ConstantInt::get(EC->getInitVal()); + return EmitLoadOfLValue(E); + } + Value *VisitObjCMessageExpr(ObjCMessageExpr *E); + Value *VisitArraySubscriptExpr(ArraySubscriptExpr *E); + Value *VisitMemberExpr(Expr *E) { return EmitLoadOfLValue(E); } + Value *VisitOCUVectorElementExpr(Expr *E) { return EmitLoadOfLValue(E); } + Value *VisitStringLiteral(Expr *E) { return EmitLValue(E).getAddress(); } + Value *VisitPreDefinedExpr(Expr *E) { return EmitLValue(E).getAddress(); } + + Value *VisitInitListExpr(InitListExpr *E) { + unsigned NumInitElements = E->getNumInits(); + + const llvm::VectorType *VType = + dyn_cast<llvm::VectorType>(ConvertType(E->getType())); + + // We have a scalar in braces. Just use the first element. + if (!VType) + return Visit(E->getInit(0)); + + unsigned NumVectorElements = VType->getNumElements(); + const llvm::Type *ElementType = VType->getElementType(); + + // Emit individual vector element stores. + llvm::Value *V = llvm::UndefValue::get(VType); + + // Emit initializers + unsigned i; + for (i = 0; i < NumInitElements; ++i) { + Value *NewV = Visit(E->getInit(i)); + Value *Idx = llvm::ConstantInt::get(llvm::Type::Int32Ty, i); + V = Builder.CreateInsertElement(V, NewV, Idx); + } + + // Emit remaining default initializers + for (/* Do not initialize i*/; i < NumVectorElements; ++i) { + Value *Idx = llvm::ConstantInt::get(llvm::Type::Int32Ty, i); + llvm::Value *NewV = llvm::Constant::getNullValue(ElementType); + V = Builder.CreateInsertElement(V, NewV, Idx); + } + + return V; + } + + Value *VisitCompoundLiteralExpr(CompoundLiteralExpr *E) { + return Visit(E->getInitializer()); + } + + Value *VisitImplicitCastExpr(const ImplicitCastExpr *E); + Value *VisitCastExpr(const CastExpr *E) { + return EmitCastExpr(E->getSubExpr(), E->getType()); + } + Value *EmitCastExpr(const Expr *E, QualType T); + + Value *VisitCallExpr(const CallExpr *E) { + return CGF.EmitCallExpr(E).getScalarVal(); + } + + Value *VisitStmtExpr(const StmtExpr *E); + + // Unary Operators. + Value *VisitPrePostIncDec(const UnaryOperator *E, bool isInc, bool isPre); + Value *VisitUnaryPostDec(const UnaryOperator *E) { + return VisitPrePostIncDec(E, false, false); + } + Value *VisitUnaryPostInc(const UnaryOperator *E) { + return VisitPrePostIncDec(E, true, false); + } + Value *VisitUnaryPreDec(const UnaryOperator *E) { + return VisitPrePostIncDec(E, false, true); + } + Value *VisitUnaryPreInc(const UnaryOperator *E) { + return VisitPrePostIncDec(E, true, true); + } + Value *VisitUnaryAddrOf(const UnaryOperator *E) { + return EmitLValue(E->getSubExpr()).getAddress(); + } + Value *VisitUnaryDeref(const Expr *E) { return EmitLoadOfLValue(E); } + Value *VisitUnaryPlus(const UnaryOperator *E) { + return Visit(E->getSubExpr()); + } + Value *VisitUnaryMinus (const UnaryOperator *E); + Value *VisitUnaryNot (const UnaryOperator *E); + Value *VisitUnaryLNot (const UnaryOperator *E); + Value *VisitUnarySizeOf (const UnaryOperator *E) { + return EmitSizeAlignOf(E->getSubExpr()->getType(), E->getType(), true); + } + Value *VisitUnaryAlignOf (const UnaryOperator *E) { + return EmitSizeAlignOf(E->getSubExpr()->getType(), E->getType(), false); + } + Value *EmitSizeAlignOf(QualType TypeToSize, QualType RetType, + bool isSizeOf); + Value *VisitUnaryReal (const UnaryOperator *E); + Value *VisitUnaryImag (const UnaryOperator *E); + Value *VisitUnaryExtension(const UnaryOperator *E) { + return Visit(E->getSubExpr()); + } + Value *VisitUnaryOffsetOf(const UnaryOperator *E); + + // Binary Operators. + Value *EmitMul(const BinOpInfo &Ops) { + return Builder.CreateMul(Ops.LHS, Ops.RHS, "mul"); + } + Value *EmitDiv(const BinOpInfo &Ops); + Value *EmitRem(const BinOpInfo &Ops); + Value *EmitAdd(const BinOpInfo &Ops); + Value *EmitSub(const BinOpInfo &Ops); + Value *EmitShl(const BinOpInfo &Ops); + Value *EmitShr(const BinOpInfo &Ops); + Value *EmitAnd(const BinOpInfo &Ops) { + return Builder.CreateAnd(Ops.LHS, Ops.RHS, "and"); + } + Value *EmitXor(const BinOpInfo &Ops) { + return Builder.CreateXor(Ops.LHS, Ops.RHS, "xor"); + } + Value *EmitOr (const BinOpInfo &Ops) { + return Builder.CreateOr(Ops.LHS, Ops.RHS, "or"); + } + + BinOpInfo EmitBinOps(const BinaryOperator *E); + Value *EmitCompoundAssign(const CompoundAssignOperator *E, + Value *(ScalarExprEmitter::*F)(const BinOpInfo &)); + + // Binary operators and binary compound assignment operators. +#define HANDLEBINOP(OP) \ + Value *VisitBin ## OP(const BinaryOperator *E) { \ + return Emit ## OP(EmitBinOps(E)); \ + } \ + Value *VisitBin ## OP ## Assign(const CompoundAssignOperator *E) { \ + return EmitCompoundAssign(E, &ScalarExprEmitter::Emit ## OP); \ + } + HANDLEBINOP(Mul); + HANDLEBINOP(Div); + HANDLEBINOP(Rem); + HANDLEBINOP(Add); + // (Sub) - Sub is handled specially below for ptr-ptr subtract. + HANDLEBINOP(Shl); + HANDLEBINOP(Shr); + HANDLEBINOP(And); + HANDLEBINOP(Xor); + HANDLEBINOP(Or); +#undef HANDLEBINOP + Value *VisitBinSub(const BinaryOperator *E); + Value *VisitBinSubAssign(const CompoundAssignOperator *E) { + return EmitCompoundAssign(E, &ScalarExprEmitter::EmitSub); + } + + // Comparisons. + Value *EmitCompare(const BinaryOperator *E, unsigned UICmpOpc, + unsigned SICmpOpc, unsigned FCmpOpc); +#define VISITCOMP(CODE, UI, SI, FP) \ + Value *VisitBin##CODE(const BinaryOperator *E) { \ + return EmitCompare(E, llvm::ICmpInst::UI, llvm::ICmpInst::SI, \ + llvm::FCmpInst::FP); } + VISITCOMP(LT, ICMP_ULT, ICMP_SLT, FCMP_OLT); + VISITCOMP(GT, ICMP_UGT, ICMP_SGT, FCMP_OGT); + VISITCOMP(LE, ICMP_ULE, ICMP_SLE, FCMP_OLE); + VISITCOMP(GE, ICMP_UGE, ICMP_SGE, FCMP_OGE); + VISITCOMP(EQ, ICMP_EQ , ICMP_EQ , FCMP_OEQ); + VISITCOMP(NE, ICMP_NE , ICMP_NE , FCMP_UNE); +#undef VISITCOMP + + Value *VisitBinAssign (const BinaryOperator *E); + + Value *VisitBinLAnd (const BinaryOperator *E); + Value *VisitBinLOr (const BinaryOperator *E); + Value *VisitBinComma (const BinaryOperator *E); + + // Other Operators. + Value *VisitConditionalOperator(const ConditionalOperator *CO); + Value *VisitChooseExpr(ChooseExpr *CE); + Value *VisitOverloadExpr(OverloadExpr *OE); + Value *VisitVAArgExpr(VAArgExpr *VE); + Value *VisitObjCStringLiteral(const ObjCStringLiteral *E) { + return CGF.EmitObjCStringLiteral(E); + } + Value *VisitObjCEncodeExpr(const ObjCEncodeExpr *E); +}; +} // end anonymous namespace. + +//===----------------------------------------------------------------------===// +// Utilities +//===----------------------------------------------------------------------===// + +/// EmitConversionToBool - Convert the specified expression value to a +/// boolean (i1) truth value. This is equivalent to "Val != 0". +Value *ScalarExprEmitter::EmitConversionToBool(Value *Src, QualType SrcType) { + assert(SrcType->isCanonical() && "EmitScalarConversion strips typedefs"); + + if (SrcType->isRealFloatingType()) { + // Compare against 0.0 for fp scalars. + llvm::Value *Zero = llvm::Constant::getNullValue(Src->getType()); + return Builder.CreateFCmpUNE(Src, Zero, "tobool"); + } + + assert((SrcType->isIntegerType() || SrcType->isPointerType()) && + "Unknown scalar type to convert"); + + // Because of the type rules of C, we often end up computing a logical value, + // then zero extending it to int, then wanting it as a logical value again. + // Optimize this common case. + if (llvm::ZExtInst *ZI = dyn_cast<llvm::ZExtInst>(Src)) { + if (ZI->getOperand(0)->getType() == llvm::Type::Int1Ty) { + Value *Result = ZI->getOperand(0); + // If there aren't any more uses, zap the instruction to save space. + // Note that there can be more uses, for example if this + // is the result of an assignment. + if (ZI->use_empty()) + ZI->eraseFromParent(); + return Result; + } + } + + // Compare against an integer or pointer null. + llvm::Value *Zero = llvm::Constant::getNullValue(Src->getType()); + return Builder.CreateICmpNE(Src, Zero, "tobool"); +} + +/// EmitScalarConversion - Emit a conversion from the specified type to the +/// specified destination type, both of which are LLVM scalar types. +Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, + QualType DstType) { + SrcType = SrcType.getCanonicalType(); + DstType = DstType.getCanonicalType(); + if (SrcType == DstType) return Src; + + if (DstType->isVoidType()) return 0; + + // Handle conversions to bool first, they are special: comparisons against 0. + if (DstType->isBooleanType()) + return EmitConversionToBool(Src, SrcType); + + const llvm::Type *DstTy = ConvertType(DstType); + + // Ignore conversions like int -> uint. + if (Src->getType() == DstTy) + return Src; + + // Handle pointer conversions next: pointers can only be converted to/from + // other pointers and integers. + if (isa<PointerType>(DstType)) { + // The source value may be an integer, or a pointer. + if (isa<llvm::PointerType>(Src->getType())) + return Builder.CreateBitCast(Src, DstTy, "conv"); + assert(SrcType->isIntegerType() && "Not ptr->ptr or int->ptr conversion?"); + return Builder.CreateIntToPtr(Src, DstTy, "conv"); + } + + if (isa<PointerType>(SrcType)) { + // Must be an ptr to int cast. + assert(isa<llvm::IntegerType>(DstTy) && "not ptr->int?"); + return Builder.CreatePtrToInt(Src, DstTy, "conv"); + } + + // A scalar source can be splatted to an OCU vector of the same element type + if (DstType->isOCUVectorType() && !isa<VectorType>(SrcType) && + cast<llvm::VectorType>(DstTy)->getElementType() == Src->getType()) + return CGF.EmitVector(&Src, DstType->getAsVectorType()->getNumElements(), + true); + + // Allow bitcast from vector to integer/fp of the same size. + if (isa<llvm::VectorType>(Src->getType()) || + isa<llvm::VectorType>(DstTy)) + return Builder.CreateBitCast(Src, DstTy, "conv"); + + // Finally, we have the arithmetic types: real int/float. + if (isa<llvm::IntegerType>(Src->getType())) { + bool InputSigned = SrcType->isSignedIntegerType(); + if (isa<llvm::IntegerType>(DstTy)) + return Builder.CreateIntCast(Src, DstTy, InputSigned, "conv"); + else if (InputSigned) + return Builder.CreateSIToFP(Src, DstTy, "conv"); + else + return Builder.CreateUIToFP(Src, DstTy, "conv"); + } + + assert(Src->getType()->isFloatingPoint() && "Unknown real conversion"); + if (isa<llvm::IntegerType>(DstTy)) { + if (DstType->isSignedIntegerType()) + return Builder.CreateFPToSI(Src, DstTy, "conv"); + else + return Builder.CreateFPToUI(Src, DstTy, "conv"); + } + + assert(DstTy->isFloatingPoint() && "Unknown real conversion"); + if (DstTy->getTypeID() < Src->getType()->getTypeID()) + return Builder.CreateFPTrunc(Src, DstTy, "conv"); + else + return Builder.CreateFPExt(Src, DstTy, "conv"); +} + +/// EmitComplexToScalarConversion - Emit a conversion from the specified +/// complex type to the specified destination type, where the destination +/// type is an LLVM scalar type. +Value *ScalarExprEmitter:: +EmitComplexToScalarConversion(CodeGenFunction::ComplexPairTy Src, + QualType SrcTy, QualType DstTy) { + // Get the source element type. + SrcTy = cast<ComplexType>(SrcTy.getCanonicalType())->getElementType(); + + // Handle conversions to bool first, they are special: comparisons against 0. + if (DstTy->isBooleanType()) { + // Complex != 0 -> (Real != 0) | (Imag != 0) + Src.first = EmitScalarConversion(Src.first, SrcTy, DstTy); + Src.second = EmitScalarConversion(Src.second, SrcTy, DstTy); + return Builder.CreateOr(Src.first, Src.second, "tobool"); + } + + // C99 6.3.1.7p2: "When a value of complex type is converted to a real type, + // the imaginary part of the complex value is discarded and the value of the + // real part is converted according to the conversion rules for the + // corresponding real type. + return EmitScalarConversion(Src.first, SrcTy, DstTy); +} + + +//===----------------------------------------------------------------------===// +// Visitor Methods +//===----------------------------------------------------------------------===// + +Value *ScalarExprEmitter::VisitExpr(Expr *E) { + CGF.WarnUnsupported(E, "scalar expression"); + if (E->getType()->isVoidType()) + return 0; + return llvm::UndefValue::get(CGF.ConvertType(E->getType())); +} + +Value *ScalarExprEmitter::VisitObjCMessageExpr(ObjCMessageExpr *E) { + // Only the lookup mechanism and first two arguments of the method + // implementation vary between runtimes. We can get the receiver and + // arguments in generic code. + + // Find the receiver + llvm::Value * Receiver = CGF.EmitScalarExpr(E->getReceiver()); + + // Process the arguments + unsigned int ArgC = E->getNumArgs(); + llvm::SmallVector<llvm::Value*, 16> Args; + for(unsigned i=0 ; i<ArgC ; i++) { + Expr *ArgExpr = E->getArg(i); + QualType ArgTy = ArgExpr->getType(); + if (!CGF.hasAggregateLLVMType(ArgTy)) { + // Scalar argument is passed by-value. + Args.push_back(CGF.EmitScalarExpr(ArgExpr)); + } else if (ArgTy->isComplexType()) { + // Make a temporary alloca to pass the argument. + llvm::Value *DestMem = CGF.CreateTempAlloca(ConvertType(ArgTy)); + CGF.EmitComplexExprIntoAddr(ArgExpr, DestMem, false); + Args.push_back(DestMem); + } else { + llvm::Value *DestMem = CGF.CreateTempAlloca(ConvertType(ArgTy)); + CGF.EmitAggExpr(ArgExpr, DestMem, false); + Args.push_back(DestMem); + } + } + + // Get the selector string + std::string SelStr = E->getSelector().getName(); + llvm::Constant *Selector = CGF.CGM.GetAddrOfConstantString(SelStr); + ConvertType(E->getType()); + return Runtime->generateMessageSend(Builder, + ConvertType(E->getType()), + Receiver, + Selector, + &Args[0], + Args.size()); +} + +Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) { + // Emit subscript expressions in rvalue context's. For most cases, this just + // loads the lvalue formed by the subscript expr. However, we have to be + // careful, because the base of a vector subscript is occasionally an rvalue, + // so we can't get it as an lvalue. + if (!E->getBase()->getType()->isVectorType()) + return EmitLoadOfLValue(E); + + // Handle the vector case. The base must be a vector, the index must be an + // integer value. + Value *Base = Visit(E->getBase()); + Value *Idx = Visit(E->getIdx()); + + // FIXME: Convert Idx to i32 type. + return Builder.CreateExtractElement(Base, Idx, "vecext"); +} + +/// VisitImplicitCastExpr - Implicit casts are the same as normal casts, but +/// also handle things like function to pointer-to-function decay, and array to +/// pointer decay. +Value *ScalarExprEmitter::VisitImplicitCastExpr(const ImplicitCastExpr *E) { + const Expr *Op = E->getSubExpr(); + + // If this is due to array->pointer conversion, emit the array expression as + // an l-value. + if (Op->getType()->isArrayType()) { + // FIXME: For now we assume that all source arrays map to LLVM arrays. This + // will not true when we add support for VLAs. + Value *V = EmitLValue(Op).getAddress(); // Bitfields can't be arrays. + + assert(isa<llvm::PointerType>(V->getType()) && + isa<llvm::ArrayType>(cast<llvm::PointerType>(V->getType()) + ->getElementType()) && + "Doesn't support VLAs yet!"); + llvm::Constant *Idx0 = llvm::ConstantInt::get(llvm::Type::Int32Ty, 0); + + llvm::Value *Ops[] = {Idx0, Idx0}; + V = Builder.CreateGEP(V, Ops, Ops+2, "arraydecay"); + + // The resultant pointer type can be implicitly casted to other pointer + // types as well, for example void*. + const llvm::Type *DestPTy = ConvertType(E->getType()); + assert(isa<llvm::PointerType>(DestPTy) && + "Only expect implicit cast to pointer"); + if (V->getType() != DestPTy) + V = Builder.CreateBitCast(V, DestPTy, "ptrconv"); + return V; + + } else if (E->getType()->isReferenceType()) { + assert(cast<ReferenceType>(E->getType().getCanonicalType())-> + getReferenceeType() == + Op->getType().getCanonicalType() && "Incompatible types!"); + + return EmitLValue(Op).getAddress(); + } + + return EmitCastExpr(Op, E->getType()); +} + + +// VisitCastExpr - Emit code for an explicit or implicit cast. Implicit casts +// have to handle a more broad range of conversions than explicit casts, as they +// handle things like function to ptr-to-function decay etc. +Value *ScalarExprEmitter::EmitCastExpr(const Expr *E, QualType DestTy) { + // Handle cases where the source is an non-complex type. + + if (!CGF.hasAggregateLLVMType(E->getType())) { + Value *Src = Visit(const_cast<Expr*>(E)); + + // Use EmitScalarConversion to perform the conversion. + return EmitScalarConversion(Src, E->getType(), DestTy); + } + + if (E->getType()->isComplexType()) { + // Handle cases where the source is a complex type. + return EmitComplexToScalarConversion(CGF.EmitComplexExpr(E), E->getType(), + DestTy); + } + + // Okay, this is a cast from an aggregate. It must be a cast to void. Just + // evaluate the result and return. + CGF.EmitAggExpr(E, 0, false); + return 0; +} + +Value *ScalarExprEmitter::VisitStmtExpr(const StmtExpr *E) { + return CGF.EmitCompoundStmt(*E->getSubStmt(), true).getScalarVal(); +} + + +//===----------------------------------------------------------------------===// +// Unary Operators +//===----------------------------------------------------------------------===// + +Value *ScalarExprEmitter::VisitPrePostIncDec(const UnaryOperator *E, + bool isInc, bool isPre) { + LValue LV = EmitLValue(E->getSubExpr()); + // FIXME: Handle volatile! + Value *InVal = CGF.EmitLoadOfLValue(LV, // false + E->getSubExpr()->getType()).getScalarVal(); + + int AmountVal = isInc ? 1 : -1; + + Value *NextVal; + if (isa<llvm::PointerType>(InVal->getType())) { + // FIXME: This isn't right for VLAs. + NextVal = llvm::ConstantInt::get(llvm::Type::Int32Ty, AmountVal); + NextVal = Builder.CreateGEP(InVal, NextVal); + } else { + // Add the inc/dec to the real part. + if (isa<llvm::IntegerType>(InVal->getType())) + NextVal = llvm::ConstantInt::get(InVal->getType(), AmountVal); + else if (InVal->getType() == llvm::Type::FloatTy) + // FIXME: Handle long double. + NextVal = + llvm::ConstantFP::get(InVal->getType(), + llvm::APFloat(static_cast<float>(AmountVal))); + else { + // FIXME: Handle long double. + assert(InVal->getType() == llvm::Type::DoubleTy); + NextVal = + llvm::ConstantFP::get(InVal->getType(), + llvm::APFloat(static_cast<double>(AmountVal))); + } + NextVal = Builder.CreateAdd(InVal, NextVal, isInc ? "inc" : "dec"); + } + + // Store the updated result through the lvalue. + CGF.EmitStoreThroughLValue(RValue::get(NextVal), LV, + E->getSubExpr()->getType()); + + // If this is a postinc, return the value read from memory, otherwise use the + // updated value. + return isPre ? NextVal : InVal; +} + + +Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E) { + Value *Op = Visit(E->getSubExpr()); + return Builder.CreateNeg(Op, "neg"); +} + +Value *ScalarExprEmitter::VisitUnaryNot(const UnaryOperator *E) { + Value *Op = Visit(E->getSubExpr()); + return Builder.CreateNot(Op, "neg"); +} + +Value *ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *E) { + // Compare operand to zero. + Value *BoolVal = CGF.EvaluateExprAsBool(E->getSubExpr()); + + // Invert value. + // TODO: Could dynamically modify easy computations here. For example, if + // the operand is an icmp ne, turn into icmp eq. + BoolVal = Builder.CreateNot(BoolVal, "lnot"); + + // ZExt result to int. + return Builder.CreateZExt(BoolVal, CGF.LLVMIntTy, "lnot.ext"); +} + +/// EmitSizeAlignOf - Return the size or alignment of the 'TypeToSize' type as +/// an integer (RetType). +Value *ScalarExprEmitter::EmitSizeAlignOf(QualType TypeToSize, + QualType RetType,bool isSizeOf){ + assert(RetType->isIntegerType() && "Result type must be an integer!"); + uint32_t ResultWidth = + static_cast<uint32_t>(CGF.getContext().getTypeSize(RetType)); + + // sizeof(void) and __alignof__(void) = 1 as a gcc extension. + if (TypeToSize->isVoidType()) + return llvm::ConstantInt::get(llvm::APInt(ResultWidth, 1)); + + /// FIXME: This doesn't handle VLAs yet! + std::pair<uint64_t, unsigned> Info = CGF.getContext().getTypeInfo(TypeToSize); + + uint64_t Val = isSizeOf ? Info.first : Info.second; + Val /= 8; // Return size in bytes, not bits. + + return llvm::ConstantInt::get(llvm::APInt(ResultWidth, Val)); +} + +Value *ScalarExprEmitter::VisitUnaryReal(const UnaryOperator *E) { + Expr *Op = E->getSubExpr(); + if (Op->getType()->isComplexType()) + return CGF.EmitComplexExpr(Op).first; + return Visit(Op); +} +Value *ScalarExprEmitter::VisitUnaryImag(const UnaryOperator *E) { + Expr *Op = E->getSubExpr(); + if (Op->getType()->isComplexType()) + return CGF.EmitComplexExpr(Op).second; + + // __imag on a scalar returns zero. Emit it the subexpr to ensure side + // effects are evaluated. + CGF.EmitScalarExpr(Op); + return llvm::Constant::getNullValue(ConvertType(E->getType())); +} + +Value *ScalarExprEmitter::VisitUnaryOffsetOf(const UnaryOperator *E) +{ + int64_t Val = E->evaluateOffsetOf(CGF.getContext()); + + assert(E->getType()->isIntegerType() && "Result type must be an integer!"); + + uint32_t ResultWidth = + static_cast<uint32_t>(CGF.getContext().getTypeSize(E->getType())); + return llvm::ConstantInt::get(llvm::APInt(ResultWidth, Val)); +} + +//===----------------------------------------------------------------------===// +// Binary Operators +//===----------------------------------------------------------------------===// + +BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) { + BinOpInfo Result; + Result.LHS = Visit(E->getLHS()); + Result.RHS = Visit(E->getRHS()); + Result.Ty = E->getType(); + Result.E = E; + return Result; +} + +Value *ScalarExprEmitter::EmitCompoundAssign(const CompoundAssignOperator *E, + Value *(ScalarExprEmitter::*Func)(const BinOpInfo &)) { + QualType LHSTy = E->getLHS()->getType(), RHSTy = E->getRHS()->getType(); + + BinOpInfo OpInfo; + + // Load the LHS and RHS operands. + LValue LHSLV = EmitLValue(E->getLHS()); + OpInfo.LHS = EmitLoadOfLValue(LHSLV, LHSTy); + + // Determine the computation type. If the RHS is complex, then this is one of + // the add/sub/mul/div operators. All of these operators can be computed in + // with just their real component even though the computation domain really is + // complex. + QualType ComputeType = E->getComputationType(); + + // If the computation type is complex, then the RHS is complex. Emit the RHS. + if (const ComplexType *CT = ComputeType->getAsComplexType()) { + ComputeType = CT->getElementType(); + + // Emit the RHS, only keeping the real component. + OpInfo.RHS = CGF.EmitComplexExpr(E->getRHS()).first; + RHSTy = RHSTy->getAsComplexType()->getElementType(); + } else { + // Otherwise the RHS is a simple scalar value. + OpInfo.RHS = Visit(E->getRHS()); + } + + // Convert the LHS/RHS values to the computation type. + OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy, ComputeType); + + // Do not merge types for -= or += where the LHS is a pointer. + if (!(E->getOpcode() == BinaryOperator::SubAssign || + E->getOpcode() == BinaryOperator::AddAssign) || + !E->getLHS()->getType()->isPointerType()) { + OpInfo.RHS = EmitScalarConversion(OpInfo.RHS, RHSTy, ComputeType); + } + OpInfo.Ty = ComputeType; + OpInfo.E = E; + + // Expand the binary operator. + Value *Result = (this->*Func)(OpInfo); + + // Truncate the result back to the LHS type. + Result = EmitScalarConversion(Result, ComputeType, LHSTy); + + // Store the result value into the LHS lvalue. + CGF.EmitStoreThroughLValue(RValue::get(Result), LHSLV, E->getType()); + + return Result; +} + + +Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) { + if (Ops.LHS->getType()->isFPOrFPVector()) + return Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div"); + else if (Ops.Ty->isUnsignedIntegerType()) + return Builder.CreateUDiv(Ops.LHS, Ops.RHS, "div"); + else + return Builder.CreateSDiv(Ops.LHS, Ops.RHS, "div"); +} + +Value *ScalarExprEmitter::EmitRem(const BinOpInfo &Ops) { + // Rem in C can't be a floating point type: C99 6.5.5p2. + if (Ops.Ty->isUnsignedIntegerType()) + return Builder.CreateURem(Ops.LHS, Ops.RHS, "rem"); + else + return Builder.CreateSRem(Ops.LHS, Ops.RHS, "rem"); +} + + +Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &Ops) { + if (!Ops.Ty->isPointerType()) + return Builder.CreateAdd(Ops.LHS, Ops.RHS, "add"); + + // FIXME: What about a pointer to a VLA? + Value *Ptr, *Idx; + Expr *IdxExp; + if (isa<llvm::PointerType>(Ops.LHS->getType())) { // pointer + int + Ptr = Ops.LHS; + Idx = Ops.RHS; + IdxExp = Ops.E->getRHS(); + } else { // int + pointer + Ptr = Ops.RHS; + Idx = Ops.LHS; + IdxExp = Ops.E->getLHS(); + } + + unsigned Width = cast<llvm::IntegerType>(Idx->getType())->getBitWidth(); + if (Width < CGF.LLVMPointerWidth) { + // Zero or sign extend the pointer value based on whether the index is + // signed or not. + const llvm::Type *IdxType = llvm::IntegerType::get(CGF.LLVMPointerWidth); + if (IdxExp->getType().getCanonicalType()->isSignedIntegerType()) + Idx = Builder.CreateSExt(Idx, IdxType, "idx.ext"); + else + Idx = Builder.CreateZExt(Idx, IdxType, "idx.ext"); + } + + return Builder.CreateGEP(Ptr, Idx, "add.ptr"); +} + +Value *ScalarExprEmitter::EmitSub(const BinOpInfo &Ops) { + if (!isa<llvm::PointerType>(Ops.LHS->getType())) + return Builder.CreateSub(Ops.LHS, Ops.RHS, "sub"); + + // pointer - int + assert(!isa<llvm::PointerType>(Ops.RHS->getType()) && + "ptr-ptr shouldn't get here"); + // FIXME: The pointer could point to a VLA. + Value *Idx = Builder.CreateNeg(Ops.RHS, "sub.ptr.neg"); + + unsigned Width = cast<llvm::IntegerType>(Idx->getType())->getBitWidth(); + if (Width < CGF.LLVMPointerWidth) { + // Zero or sign extend the pointer value based on whether the index is + // signed or not. + const llvm::Type *IdxType = llvm::IntegerType::get(CGF.LLVMPointerWidth); + if (Ops.E->getRHS()->getType().getCanonicalType()->isSignedIntegerType()) + Idx = Builder.CreateSExt(Idx, IdxType, "idx.ext"); + else + Idx = Builder.CreateZExt(Idx, IdxType, "idx.ext"); + } + + return Builder.CreateGEP(Ops.LHS, Idx, "sub.ptr"); +} + +Value *ScalarExprEmitter::VisitBinSub(const BinaryOperator *E) { + // "X - Y" is different from "X -= Y" in one case: when Y is a pointer. In + // the compound assignment case it is invalid, so just handle it here. + if (!E->getRHS()->getType()->isPointerType()) + return EmitSub(EmitBinOps(E)); + + // pointer - pointer + Value *LHS = Visit(E->getLHS()); + Value *RHS = Visit(E->getRHS()); + + const QualType LHSType = E->getLHS()->getType().getCanonicalType(); + const QualType LHSElementType = cast<PointerType>(LHSType)->getPointeeType(); + uint64_t ElementSize = CGF.getContext().getTypeSize(LHSElementType) / 8; + + const llvm::Type *ResultType = ConvertType(E->getType()); + LHS = Builder.CreatePtrToInt(LHS, ResultType, "sub.ptr.lhs.cast"); + RHS = Builder.CreatePtrToInt(RHS, ResultType, "sub.ptr.rhs.cast"); + Value *BytesBetween = Builder.CreateSub(LHS, RHS, "sub.ptr.sub"); + + // HACK: LLVM doesn't have an divide instruction that 'knows' there is no + // remainder. As such, we handle common power-of-two cases here to generate + // better code. + if (llvm::isPowerOf2_64(ElementSize)) { + Value *ShAmt = + llvm::ConstantInt::get(ResultType, llvm::Log2_64(ElementSize)); + return Builder.CreateAShr(BytesBetween, ShAmt, "sub.ptr.shr"); + } + + // Otherwise, do a full sdiv. + Value *BytesPerElt = llvm::ConstantInt::get(ResultType, ElementSize); + return Builder.CreateSDiv(BytesBetween, BytesPerElt, "sub.ptr.div"); +} + + +Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) { + // LLVM requires the LHS and RHS to be the same type: promote or truncate the + // RHS to the same size as the LHS. + Value *RHS = Ops.RHS; + if (Ops.LHS->getType() != RHS->getType()) + RHS = Builder.CreateIntCast(RHS, Ops.LHS->getType(), false, "sh_prom"); + + return Builder.CreateShl(Ops.LHS, RHS, "shl"); +} + +Value *ScalarExprEmitter::EmitShr(const BinOpInfo &Ops) { + // LLVM requires the LHS and RHS to be the same type: promote or truncate the + // RHS to the same size as the LHS. + Value *RHS = Ops.RHS; + if (Ops.LHS->getType() != RHS->getType()) + RHS = Builder.CreateIntCast(RHS, Ops.LHS->getType(), false, "sh_prom"); + + if (Ops.Ty->isUnsignedIntegerType()) + return Builder.CreateLShr(Ops.LHS, RHS, "shr"); + return Builder.CreateAShr(Ops.LHS, RHS, "shr"); +} + +Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E,unsigned UICmpOpc, + unsigned SICmpOpc, unsigned FCmpOpc) { + Value *Result; + QualType LHSTy = E->getLHS()->getType(); + if (!LHSTy->isComplexType()) { + Value *LHS = Visit(E->getLHS()); + Value *RHS = Visit(E->getRHS()); + + if (LHS->getType()->isFloatingPoint()) { + Result = Builder.CreateFCmp((llvm::FCmpInst::Predicate)FCmpOpc, + LHS, RHS, "cmp"); + } else if (LHSTy->isUnsignedIntegerType()) { + Result = Builder.CreateICmp((llvm::ICmpInst::Predicate)UICmpOpc, + LHS, RHS, "cmp"); + } else { + // Signed integers and pointers. + Result = Builder.CreateICmp((llvm::ICmpInst::Predicate)SICmpOpc, + LHS, RHS, "cmp"); + } + } else { + // Complex Comparison: can only be an equality comparison. + CodeGenFunction::ComplexPairTy LHS = CGF.EmitComplexExpr(E->getLHS()); + CodeGenFunction::ComplexPairTy RHS = CGF.EmitComplexExpr(E->getRHS()); + + QualType CETy = + cast<ComplexType>(LHSTy.getCanonicalType())->getElementType(); + + Value *ResultR, *ResultI; + if (CETy->isRealFloatingType()) { + ResultR = Builder.CreateFCmp((llvm::FCmpInst::Predicate)FCmpOpc, + LHS.first, RHS.first, "cmp.r"); + ResultI = Builder.CreateFCmp((llvm::FCmpInst::Predicate)FCmpOpc, + LHS.second, RHS.second, "cmp.i"); + } else { + // Complex comparisons can only be equality comparisons. As such, signed + // and unsigned opcodes are the same. + ResultR = Builder.CreateICmp((llvm::ICmpInst::Predicate)UICmpOpc, + LHS.first, RHS.first, "cmp.r"); + ResultI = Builder.CreateICmp((llvm::ICmpInst::Predicate)UICmpOpc, + LHS.second, RHS.second, "cmp.i"); + } + + if (E->getOpcode() == BinaryOperator::EQ) { + Result = Builder.CreateAnd(ResultR, ResultI, "and.ri"); + } else { + assert(E->getOpcode() == BinaryOperator::NE && + "Complex comparison other than == or != ?"); + Result = Builder.CreateOr(ResultR, ResultI, "or.ri"); + } + } + + // ZExt result to int. + return Builder.CreateZExt(Result, CGF.LLVMIntTy, "cmp.ext"); +} + +Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { + LValue LHS = EmitLValue(E->getLHS()); + Value *RHS = Visit(E->getRHS()); + + // Store the value into the LHS. + // FIXME: Volatility! + CGF.EmitStoreThroughLValue(RValue::get(RHS), LHS, E->getType()); + + // Return the RHS. + return RHS; +} + +Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) { + Value *LHSCond = CGF.EvaluateExprAsBool(E->getLHS()); + + llvm::BasicBlock *ContBlock = new llvm::BasicBlock("land_cont"); + llvm::BasicBlock *RHSBlock = new llvm::BasicBlock("land_rhs"); + + llvm::BasicBlock *OrigBlock = Builder.GetInsertBlock(); + Builder.CreateCondBr(LHSCond, RHSBlock, ContBlock); + + CGF.EmitBlock(RHSBlock); + Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS()); + + // Reaquire the RHS block, as there may be subblocks inserted. + RHSBlock = Builder.GetInsertBlock(); + CGF.EmitBlock(ContBlock); + + // Create a PHI node. If we just evaluted the LHS condition, the result is + // false. If we evaluated both, the result is the RHS condition. + llvm::PHINode *PN = Builder.CreatePHI(llvm::Type::Int1Ty, "land"); + PN->reserveOperandSpace(2); + PN->addIncoming(llvm::ConstantInt::getFalse(), OrigBlock); + PN->addIncoming(RHSCond, RHSBlock); + + // ZExt result to int. + return Builder.CreateZExt(PN, CGF.LLVMIntTy, "land.ext"); +} + +Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) { + Value *LHSCond = CGF.EvaluateExprAsBool(E->getLHS()); + + llvm::BasicBlock *ContBlock = new llvm::BasicBlock("lor_cont"); + llvm::BasicBlock *RHSBlock = new llvm::BasicBlock("lor_rhs"); + + llvm::BasicBlock *OrigBlock = Builder.GetInsertBlock(); + Builder.CreateCondBr(LHSCond, ContBlock, RHSBlock); + + CGF.EmitBlock(RHSBlock); + Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS()); + + // Reaquire the RHS block, as there may be subblocks inserted. + RHSBlock = Builder.GetInsertBlock(); + CGF.EmitBlock(ContBlock); + + // Create a PHI node. If we just evaluted the LHS condition, the result is + // true. If we evaluated both, the result is the RHS condition. + llvm::PHINode *PN = Builder.CreatePHI(llvm::Type::Int1Ty, "lor"); + PN->reserveOperandSpace(2); + PN->addIncoming(llvm::ConstantInt::getTrue(), OrigBlock); + PN->addIncoming(RHSCond, RHSBlock); + + // ZExt result to int. + return Builder.CreateZExt(PN, CGF.LLVMIntTy, "lor.ext"); +} + +Value *ScalarExprEmitter::VisitBinComma(const BinaryOperator *E) { + CGF.EmitStmt(E->getLHS()); + return Visit(E->getRHS()); +} + +//===----------------------------------------------------------------------===// +// Other Operators +//===----------------------------------------------------------------------===// + +Value *ScalarExprEmitter:: +VisitConditionalOperator(const ConditionalOperator *E) { + llvm::BasicBlock *LHSBlock = new llvm::BasicBlock("cond.?"); + llvm::BasicBlock *RHSBlock = new llvm::BasicBlock("cond.:"); + llvm::BasicBlock *ContBlock = new llvm::BasicBlock("cond.cont"); + + // Evaluate the conditional, then convert it to bool. We do this explicitly + // because we need the unconverted value if this is a GNU ?: expression with + // missing middle value. + Value *CondVal = CGF.EmitScalarExpr(E->getCond()); + Value *CondBoolVal =CGF.EmitScalarConversion(CondVal, E->getCond()->getType(), + CGF.getContext().BoolTy); + Builder.CreateCondBr(CondBoolVal, LHSBlock, RHSBlock); + + CGF.EmitBlock(LHSBlock); + + // Handle the GNU extension for missing LHS. + Value *LHS; + if (E->getLHS()) + LHS = Visit(E->getLHS()); + else // Perform promotions, to handle cases like "short ?: int" + LHS = EmitScalarConversion(CondVal, E->getCond()->getType(), E->getType()); + + Builder.CreateBr(ContBlock); + LHSBlock = Builder.GetInsertBlock(); + + CGF.EmitBlock(RHSBlock); + + Value *RHS = Visit(E->getRHS()); + Builder.CreateBr(ContBlock); + RHSBlock = Builder.GetInsertBlock(); + + CGF.EmitBlock(ContBlock); + + if (!LHS) { + assert(E->getType()->isVoidType() && "Non-void value should have a value"); + return 0; + } + + // Create a PHI node for the real part. + llvm::PHINode *PN = Builder.CreatePHI(LHS->getType(), "cond"); + PN->reserveOperandSpace(2); + PN->addIncoming(LHS, LHSBlock); + PN->addIncoming(RHS, RHSBlock); + return PN; +} + +Value *ScalarExprEmitter::VisitChooseExpr(ChooseExpr *E) { + // Emit the LHS or RHS as appropriate. + return + Visit(E->isConditionTrue(CGF.getContext()) ? E->getLHS() : E->getRHS()); +} + +Value *ScalarExprEmitter::VisitOverloadExpr(OverloadExpr *E) { + return CGF.EmitCallExpr(E->getFn(), E->arg_begin(), + E->getNumArgs(CGF.getContext())).getScalarVal(); +} + +Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) { + llvm::Value *ArgValue = EmitLValue(VE->getSubExpr()).getAddress(); + + llvm::Value *V = Builder.CreateVAArg(ArgValue, ConvertType(VE->getType())); + return V; +} + +Value *ScalarExprEmitter::VisitObjCEncodeExpr(const ObjCEncodeExpr *E) { + std::string str; + llvm::SmallVector<const RecordType *, 8> EncodingRecordTypes; + CGF.getContext().getObjCEncodingForType(E->getEncodedType(), str, + EncodingRecordTypes); + + llvm::Constant *C = llvm::ConstantArray::get(str); + C = new llvm::GlobalVariable(C->getType(), true, + llvm::GlobalValue::InternalLinkage, + C, ".str", &CGF.CGM.getModule()); + llvm::Constant *Zero = llvm::Constant::getNullValue(llvm::Type::Int32Ty); + llvm::Constant *Zeros[] = { Zero, Zero }; + C = llvm::ConstantExpr::getGetElementPtr(C, Zeros, 2); + + return C; +} + +//===----------------------------------------------------------------------===// +// Entry Point into this File +//===----------------------------------------------------------------------===// + +/// EmitComplexExpr - Emit the computation of the specified expression of +/// complex type, ignoring the result. +Value *CodeGenFunction::EmitScalarExpr(const Expr *E) { + assert(E && !hasAggregateLLVMType(E->getType()) && + "Invalid scalar expression to emit"); + + return ScalarExprEmitter(*this).Visit(const_cast<Expr*>(E)); +} + +/// EmitScalarConversion - Emit a conversion from the specified type to the +/// specified destination type, both of which are LLVM scalar types. +Value *CodeGenFunction::EmitScalarConversion(Value *Src, QualType SrcTy, + QualType DstTy) { + assert(!hasAggregateLLVMType(SrcTy) && !hasAggregateLLVMType(DstTy) && + "Invalid scalar expression to emit"); + return ScalarExprEmitter(*this).EmitScalarConversion(Src, SrcTy, DstTy); +} + +/// EmitComplexToScalarConversion - Emit a conversion from the specified +/// complex type to the specified destination type, where the destination +/// type is an LLVM scalar type. +Value *CodeGenFunction::EmitComplexToScalarConversion(ComplexPairTy Src, + QualType SrcTy, + QualType DstTy) { + assert(SrcTy->isComplexType() && !hasAggregateLLVMType(DstTy) && + "Invalid complex -> scalar conversion"); + return ScalarExprEmitter(*this).EmitComplexToScalarConversion(Src, SrcTy, + DstTy); +} + +Value *CodeGenFunction::EmitShuffleVector(Value* V1, Value *V2, ...) { + assert(V1->getType() == V2->getType() && + "Vector operands must be of the same type"); + + unsigned NumElements = + cast<llvm::VectorType>(V1->getType())->getNumElements(); + + va_list va; + va_start(va, V2); + + llvm::SmallVector<llvm::Constant*, 16> Args; + + for (unsigned i = 0; i < NumElements; i++) { + int n = va_arg(va, int); + + assert(n >= 0 && n < (int)NumElements * 2 && + "Vector shuffle index out of bounds!"); + + Args.push_back(llvm::ConstantInt::get(llvm::Type::Int32Ty, n)); + } + + const char *Name = va_arg(va, const char *); + va_end(va); + + llvm::Constant *Mask = llvm::ConstantVector::get(&Args[0], NumElements); + + return Builder.CreateShuffleVector(V1, V2, Mask, Name); +} + +llvm::Value *CodeGenFunction::EmitVector(llvm::Value * const *Vals, + unsigned NumVals, bool isSplat) +{ + llvm::Value *Vec + = llvm::UndefValue::get(llvm::VectorType::get(Vals[0]->getType(), NumVals)); + + for (unsigned i = 0, e = NumVals ; i != e; ++i) { + llvm::Value *Val = isSplat ? Vals[0] : Vals[i]; + llvm::Value *Idx = llvm::ConstantInt::get(llvm::Type::Int32Ty, i); + Vec = Builder.CreateInsertElement(Vec, Val, Idx, "tmp"); + } + + return Vec; +} diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp new file mode 100644 index 00000000000..33419a3d51d --- /dev/null +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -0,0 +1,25 @@ +//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Objective-C code as LLVM code. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "CodeGenModule.h" +#include "clang/AST/Expr.h" +#include "llvm/Constant.h" +using namespace clang; +using namespace CodeGen; + +llvm::Value *CodeGenFunction::EmitObjCStringLiteral(const ObjCStringLiteral *E){ + std::string S(E->getString()->getStrData(), E->getString()->getByteLength()); + return CGM.GetAddrOfConstantCFString(S); +} + diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp new file mode 100644 index 00000000000..f0d6f554d55 --- /dev/null +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -0,0 +1,97 @@ +//===------- CGObjCGNU.cpp - Emit LLVM Code from ASTs for a Module --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This provides Objective-C code generation targetting the GNU runtime. +// +//===----------------------------------------------------------------------===// + +#include "CGObjCRuntime.h" +#include "llvm/Module.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/LLVMBuilder.h" +#include "llvm/ADT/SmallVector.h" + +using namespace clang::CodeGen; +using namespace clang; + +CGObjCRuntime::~CGObjCRuntime() {} + +namespace { +class CGObjCGNU : public CGObjCRuntime { +private: + llvm::Module &TheModule; +public: + CGObjCGNU(llvm::Module &M) : TheModule(M) {}; + virtual llvm::Value *generateMessageSend(llvm::LLVMFoldingBuilder &Builder, + const llvm::Type *ReturnTy, + llvm::Value *Receiver, + llvm::Constant *Selector, + llvm::Value** ArgV, + unsigned ArgC); +}; +} // end anonymous namespace + +// Generate code for a message send expression on the GNU runtime. +// BIG FAT WARNING: Much of this code will need factoring out later. +// FIXME: This currently only handles id returns. Other return types +// need some explicit casting. +llvm::Value *CGObjCGNU::generateMessageSend(llvm::LLVMFoldingBuilder &Builder, + const llvm::Type *ReturnTy, + llvm::Value *Receiver, + llvm::Constant *Selector, + llvm::Value** ArgV, + unsigned ArgC) { + // Get the selector Type. + const llvm::Type *PtrToInt8Ty = + llvm::PointerType::getUnqual(llvm::Type::Int8Ty); + std::vector<const llvm::Type*> Str2(2, PtrToInt8Ty); + const llvm::Type *SelStructTy = llvm::StructType::get(Str2); + const llvm::Type *SelTy = llvm::PointerType::getUnqual(SelStructTy); + + // Look up the selector. + // If we haven't got the selector lookup function, look it up now. + // TODO: Factor this out and use it to implement @selector() too. + llvm::Constant *SelFunction = + TheModule.getOrInsertFunction("sel_get_uid", SelTy, PtrToInt8Ty, NULL); + // FIXME: Selectors should be statically cached, not looked up on every call. + + // TODO: Pull this out into the caller. + llvm::Constant *Idx0 = llvm::ConstantInt::get(llvm::Type::Int32Ty, 0); + llvm::Constant *Ops[] = {Idx0, Idx0}; + llvm::Value *SelStr = llvm::ConstantExpr::getGetElementPtr(Selector, Ops, 2); + llvm::Value *cmd = Builder.CreateCall(SelFunction, &SelStr, &SelStr+1); + + // Look up the method implementation. + std::vector<const llvm::Type*> impArgTypes; + impArgTypes.push_back(Receiver->getType()); + impArgTypes.push_back(SelTy); + + // Avoid an explicit cast on the IMP by getting a version that has the right + // return type. + llvm::FunctionType *impType = llvm::FunctionType::get(ReturnTy, impArgTypes, + true); + + llvm::Constant *lookupFunction = + TheModule.getOrInsertFunction("objc_msg_lookup", + llvm::PointerType::get(impType, 0), + Receiver->getType(), SelTy, NULL); + llvm::SmallVector<llvm::Value*, 16> lookupArgs; + lookupArgs.push_back(Receiver); + lookupArgs.push_back(cmd); + llvm::Value *imp = Builder.CreateCall(lookupFunction, + lookupArgs.begin(), lookupArgs.end()); + + // Call the method. + lookupArgs.insert(lookupArgs.end(), ArgV, ArgV+ArgC); + return Builder.CreateCall(imp, lookupArgs.begin(), lookupArgs.end()); +} + +CGObjCRuntime * clang::CodeGen::CreateObjCRuntime(llvm::Module &M) { + return new CGObjCGNU(M); +} diff --git a/clang/lib/CodeGen/CGObjCRuntime.h b/clang/lib/CodeGen/CGObjCRuntime.h new file mode 100644 index 00000000000..d8fc60f99c0 --- /dev/null +++ b/clang/lib/CodeGen/CGObjCRuntime.h @@ -0,0 +1,47 @@ +//===----- CGObjCRuntime.h - Emit LLVM Code from ASTs for a Module --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This provides an abstract class for Objective-C code generation. Concrete +// subclasses of this implement code generation for specific Objective-C +// runtime libraries. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_CODEGEN_OBCJRUNTIME_H +#define CLANG_CODEGEN_OBCJRUNTIME_H + +namespace llvm { + class LLVMFoldingBuilder; + class Constant; + class Type; + class Value; + class Module; +} + +namespace clang { +namespace CodeGen { + +// Implements runtime-specific code generation functions +class CGObjCRuntime { +public: + virtual ~CGObjCRuntime(); + + // Generate an Objective-C message send operation + virtual llvm::Value *generateMessageSend(llvm::LLVMFoldingBuilder &Builder, + const llvm::Type *ReturnTy, + llvm::Value *Receiver, + llvm::Constant *Selector, + llvm::Value** ArgV, + unsigned ArgC) = 0; +}; + +CGObjCRuntime *CreateObjCRuntime(llvm::Module &M); +} +} +#endif diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp new file mode 100644 index 00000000000..5fdc61b46f8 --- /dev/null +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -0,0 +1,776 @@ +//===--- CGStmt.cpp - Emit LLVM Code from Statements ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Stmt nodes as LLVM code. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "clang/AST/AST.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/InlineAsm.h" +#include "llvm/ADT/StringExtras.h" +using namespace clang; +using namespace CodeGen; + +//===----------------------------------------------------------------------===// +// Statement Emission +//===----------------------------------------------------------------------===// + +void CodeGenFunction::EmitStmt(const Stmt *S) { + assert(S && "Null statement?"); + + switch (S->getStmtClass()) { + default: + // Must be an expression in a stmt context. Emit the value (to get + // side-effects) and ignore the result. + if (const Expr *E = dyn_cast<Expr>(S)) { + if (!hasAggregateLLVMType(E->getType())) + EmitScalarExpr(E); + else if (E->getType()->isComplexType()) + EmitComplexExpr(E); + else + EmitAggExpr(E, 0, false); + } else { + WarnUnsupported(S, "statement"); + } + break; + case Stmt::NullStmtClass: break; + case Stmt::CompoundStmtClass: EmitCompoundStmt(cast<CompoundStmt>(*S)); break; + case Stmt::LabelStmtClass: EmitLabelStmt(cast<LabelStmt>(*S)); break; + case Stmt::GotoStmtClass: EmitGotoStmt(cast<GotoStmt>(*S)); break; + + case Stmt::IfStmtClass: EmitIfStmt(cast<IfStmt>(*S)); break; + case Stmt::WhileStmtClass: EmitWhileStmt(cast<WhileStmt>(*S)); break; + case Stmt::DoStmtClass: EmitDoStmt(cast<DoStmt>(*S)); break; + case Stmt::ForStmtClass: EmitForStmt(cast<ForStmt>(*S)); break; + + case Stmt::ReturnStmtClass: EmitReturnStmt(cast<ReturnStmt>(*S)); break; + case Stmt::DeclStmtClass: EmitDeclStmt(cast<DeclStmt>(*S)); break; + + case Stmt::BreakStmtClass: EmitBreakStmt(); break; + case Stmt::ContinueStmtClass: EmitContinueStmt(); break; + case Stmt::SwitchStmtClass: EmitSwitchStmt(cast<SwitchStmt>(*S)); break; + case Stmt::DefaultStmtClass: EmitDefaultStmt(cast<DefaultStmt>(*S)); break; + case Stmt::CaseStmtClass: EmitCaseStmt(cast<CaseStmt>(*S)); break; + case Stmt::AsmStmtClass: EmitAsmStmt(cast<AsmStmt>(*S)); break; + } +} + +/// EmitCompoundStmt - Emit a compound statement {..} node. If GetLast is true, +/// this captures the expression result of the last sub-statement and returns it +/// (for use by the statement expression extension). +RValue CodeGenFunction::EmitCompoundStmt(const CompoundStmt &S, bool GetLast, + llvm::Value *AggLoc, bool isAggVol) { + // FIXME: handle vla's etc. + if (S.body_empty() || !isa<Expr>(S.body_back())) GetLast = false; + + for (CompoundStmt::const_body_iterator I = S.body_begin(), + E = S.body_end()-GetLast; I != E; ++I) + EmitStmt(*I); + + + if (!GetLast) + return RValue::get(0); + + return EmitAnyExpr(cast<Expr>(S.body_back()), AggLoc); +} + +void CodeGenFunction::EmitBlock(llvm::BasicBlock *BB) { + // Emit a branch from this block to the next one if this was a real block. If + // this was just a fall-through block after a terminator, don't emit it. + llvm::BasicBlock *LastBB = Builder.GetInsertBlock(); + + if (LastBB->getTerminator()) { + // If the previous block is already terminated, don't touch it. + } else if (LastBB->empty() && LastBB->getValueName() == 0) { + // If the last block was an empty placeholder, remove it now. + // TODO: cache and reuse these. + Builder.GetInsertBlock()->eraseFromParent(); + } else { + // Otherwise, create a fall-through branch. + Builder.CreateBr(BB); + } + CurFn->getBasicBlockList().push_back(BB); + Builder.SetInsertPoint(BB); +} + +void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) { + llvm::BasicBlock *NextBB = getBasicBlockForLabel(&S); + + EmitBlock(NextBB); + EmitStmt(S.getSubStmt()); +} + +void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) { + Builder.CreateBr(getBasicBlockForLabel(S.getLabel())); + + // Emit a block after the branch so that dead code after a goto has some place + // to go. + Builder.SetInsertPoint(new llvm::BasicBlock("", CurFn)); +} + +void CodeGenFunction::EmitIfStmt(const IfStmt &S) { + // C99 6.8.4.1: The first substatement is executed if the expression compares + // unequal to 0. The condition must be a scalar type. + llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond()); + + llvm::BasicBlock *ContBlock = new llvm::BasicBlock("ifend"); + llvm::BasicBlock *ThenBlock = new llvm::BasicBlock("ifthen"); + llvm::BasicBlock *ElseBlock = ContBlock; + + if (S.getElse()) + ElseBlock = new llvm::BasicBlock("ifelse"); + + // Insert the conditional branch. + Builder.CreateCondBr(BoolCondVal, ThenBlock, ElseBlock); + + // Emit the 'then' code. + EmitBlock(ThenBlock); + EmitStmt(S.getThen()); + llvm::BasicBlock *BB = Builder.GetInsertBlock(); + if (isDummyBlock(BB)) { + BB->eraseFromParent(); + Builder.SetInsertPoint(ThenBlock); + } + else + Builder.CreateBr(ContBlock); + + // Emit the 'else' code if present. + if (const Stmt *Else = S.getElse()) { + EmitBlock(ElseBlock); + EmitStmt(Else); + llvm::BasicBlock *BB = Builder.GetInsertBlock(); + if (isDummyBlock(BB)) { + BB->eraseFromParent(); + Builder.SetInsertPoint(ElseBlock); + } + else + Builder.CreateBr(ContBlock); + } + + // Emit the continuation block for code after the if. + EmitBlock(ContBlock); +} + +void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) { + // Emit the header for the loop, insert it, which will create an uncond br to + // it. + llvm::BasicBlock *LoopHeader = new llvm::BasicBlock("whilecond"); + EmitBlock(LoopHeader); + + // Evaluate the conditional in the while header. C99 6.8.5.1: The evaluation + // of the controlling expression takes place before each execution of the loop + // body. + llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond()); + + // while(1) is common, avoid extra exit blocks. Be sure + // to correctly handle break/continue though. + bool EmitBoolCondBranch = true; + if (llvm::ConstantInt *C = dyn_cast<llvm::ConstantInt>(BoolCondVal)) + if (C->isOne()) + EmitBoolCondBranch = false; + + // Create an exit block for when the condition fails, create a block for the + // body of the loop. + llvm::BasicBlock *ExitBlock = new llvm::BasicBlock("whileexit"); + llvm::BasicBlock *LoopBody = new llvm::BasicBlock("whilebody"); + + // As long as the condition is true, go to the loop body. + if (EmitBoolCondBranch) + Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); + + // Store the blocks to use for break and continue. + BreakContinueStack.push_back(BreakContinue(ExitBlock, LoopHeader)); + + // Emit the loop body. + EmitBlock(LoopBody); + EmitStmt(S.getBody()); + + BreakContinueStack.pop_back(); + + // Cycle to the condition. + Builder.CreateBr(LoopHeader); + + // Emit the exit block. + EmitBlock(ExitBlock); + + // If LoopHeader is a simple forwarding block then eliminate it. + if (!EmitBoolCondBranch + && &LoopHeader->front() == LoopHeader->getTerminator()) { + LoopHeader->replaceAllUsesWith(LoopBody); + LoopHeader->getTerminator()->eraseFromParent(); + LoopHeader->eraseFromParent(); + } +} + +void CodeGenFunction::EmitDoStmt(const DoStmt &S) { + // Emit the body for the loop, insert it, which will create an uncond br to + // it. + llvm::BasicBlock *LoopBody = new llvm::BasicBlock("dobody"); + llvm::BasicBlock *AfterDo = new llvm::BasicBlock("afterdo"); + EmitBlock(LoopBody); + + llvm::BasicBlock *DoCond = new llvm::BasicBlock("docond"); + + // Store the blocks to use for break and continue. + BreakContinueStack.push_back(BreakContinue(AfterDo, DoCond)); + + // Emit the body of the loop into the block. + EmitStmt(S.getBody()); + + BreakContinueStack.pop_back(); + + EmitBlock(DoCond); + + // C99 6.8.5.2: "The evaluation of the controlling expression takes place + // after each execution of the loop body." + + // Evaluate the conditional in the while header. + // C99 6.8.5p2/p4: The first substatement is executed if the expression + // compares unequal to 0. The condition must be a scalar type. + llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond()); + + // "do {} while (0)" is common in macros, avoid extra blocks. Be sure + // to correctly handle break/continue though. + bool EmitBoolCondBranch = true; + if (llvm::ConstantInt *C = dyn_cast<llvm::ConstantInt>(BoolCondVal)) + if (C->isZero()) + EmitBoolCondBranch = false; + + // As long as the condition is true, iterate the loop. + if (EmitBoolCondBranch) + Builder.CreateCondBr(BoolCondVal, LoopBody, AfterDo); + + // Emit the exit block. + EmitBlock(AfterDo); + + // If DoCond is a simple forwarding block then eliminate it. + if (!EmitBoolCondBranch && &DoCond->front() == DoCond->getTerminator()) { + DoCond->replaceAllUsesWith(AfterDo); + DoCond->getTerminator()->eraseFromParent(); + DoCond->eraseFromParent(); + } +} + +void CodeGenFunction::EmitForStmt(const ForStmt &S) { + // FIXME: What do we do if the increment (f.e.) contains a stmt expression, + // which contains a continue/break? + // TODO: We could keep track of whether the loop body contains any + // break/continue statements and not create unnecessary blocks (like + // "afterfor" for a condless loop) if it doesn't. + + // Evaluate the first part before the loop. + if (S.getInit()) + EmitStmt(S.getInit()); + + // Start the loop with a block that tests the condition. + llvm::BasicBlock *CondBlock = new llvm::BasicBlock("forcond"); + llvm::BasicBlock *AfterFor = new llvm::BasicBlock("afterfor"); + + EmitBlock(CondBlock); + + // Evaluate the condition if present. If not, treat it as a non-zero-constant + // according to 6.8.5.3p2, aka, true. + if (S.getCond()) { + // C99 6.8.5p2/p4: The first substatement is executed if the expression + // compares unequal to 0. The condition must be a scalar type. + llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond()); + + // As long as the condition is true, iterate the loop. + llvm::BasicBlock *ForBody = new llvm::BasicBlock("forbody"); + Builder.CreateCondBr(BoolCondVal, ForBody, AfterFor); + EmitBlock(ForBody); + } else { + // Treat it as a non-zero constant. Don't even create a new block for the + // body, just fall into it. + } + + // If the for loop doesn't have an increment we can just use the + // condition as the continue block. + llvm::BasicBlock *ContinueBlock; + if (S.getInc()) + ContinueBlock = new llvm::BasicBlock("forinc"); + else + ContinueBlock = CondBlock; + + // Store the blocks to use for break and continue. + BreakContinueStack.push_back(BreakContinue(AfterFor, ContinueBlock)); + + // If the condition is true, execute the body of the for stmt. + EmitStmt(S.getBody()); + + BreakContinueStack.pop_back(); + + if (S.getInc()) + EmitBlock(ContinueBlock); + + // If there is an increment, emit it next. + if (S.getInc()) + EmitStmt(S.getInc()); + + // Finally, branch back up to the condition for the next iteration. + Builder.CreateBr(CondBlock); + + // Emit the fall-through block. + EmitBlock(AfterFor); +} + +/// EmitReturnStmt - Note that due to GCC extensions, this can have an operand +/// if the function returns void, or may be missing one if the function returns +/// non-void. Fun stuff :). +void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { + // Emit the result value, even if unused, to evalute the side effects. + const Expr *RV = S.getRetValue(); + + QualType FnRetTy = CurFuncDecl->getType().getCanonicalType(); + FnRetTy = cast<FunctionType>(FnRetTy)->getResultType(); + + if (FnRetTy->isVoidType()) { + // If the function returns void, emit ret void. + Builder.CreateRetVoid(); + } else if (RV == 0) { + // Handle "return;" in a function that returns a value. + const llvm::Type *RetTy = CurFn->getFunctionType()->getReturnType(); + if (RetTy == llvm::Type::VoidTy) + Builder.CreateRetVoid(); // struct return etc. + else + Builder.CreateRet(llvm::UndefValue::get(RetTy)); + } else if (!hasAggregateLLVMType(RV->getType())) { + Builder.CreateRet(EmitScalarExpr(RV)); + } else if (RV->getType()->isComplexType()) { + llvm::Value *SRetPtr = CurFn->arg_begin(); + EmitComplexExprIntoAddr(RV, SRetPtr, false); + } else { + llvm::Value *SRetPtr = CurFn->arg_begin(); + EmitAggExpr(RV, SRetPtr, false); + } + + // Emit a block after the branch so that dead code after a return has some + // place to go. + EmitBlock(new llvm::BasicBlock()); +} + +void CodeGenFunction::EmitDeclStmt(const DeclStmt &S) { + for (const ScopedDecl *Decl = S.getDecl(); Decl; + Decl = Decl->getNextDeclarator()) + EmitDecl(*Decl); +} + +void CodeGenFunction::EmitBreakStmt() { + assert(!BreakContinueStack.empty() && "break stmt not in a loop or switch!"); + + llvm::BasicBlock *Block = BreakContinueStack.back().BreakBlock; + Builder.CreateBr(Block); + EmitBlock(new llvm::BasicBlock()); +} + +void CodeGenFunction::EmitContinueStmt() { + assert(!BreakContinueStack.empty() && "continue stmt not in a loop!"); + + llvm::BasicBlock *Block = BreakContinueStack.back().ContinueBlock; + Builder.CreateBr(Block); + EmitBlock(new llvm::BasicBlock()); +} + +/// EmitCaseStmtRange - If case statement range is not too big then +/// add multiple cases to switch instruction, one for each value within +/// the range. If range is too big then emit "if" condition check. +void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) { + assert (S.getRHS() && "Unexpected RHS value in CaseStmt"); + + const Expr *L = S.getLHS(); + const Expr *R = S.getRHS(); + llvm::ConstantInt *LV = cast<llvm::ConstantInt>(EmitScalarExpr(L)); + llvm::ConstantInt *RV = cast<llvm::ConstantInt>(EmitScalarExpr(R)); + llvm::APInt LHS = LV->getValue(); + const llvm::APInt &RHS = RV->getValue(); + + llvm::APInt Range = RHS - LHS; + if (Range.ult(llvm::APInt(Range.getBitWidth(), 64))) { + // Range is small enough to add multiple switch instruction cases. + StartBlock("sw.bb"); + llvm::BasicBlock *CaseDest = Builder.GetInsertBlock(); + SwitchInsn->addCase(LV, CaseDest); + LHS++; + while (LHS != RHS) { + SwitchInsn->addCase(llvm::ConstantInt::get(LHS), CaseDest); + LHS++; + } + SwitchInsn->addCase(RV, CaseDest); + EmitStmt(S.getSubStmt()); + return; + } + + // The range is too big. Emit "if" condition. + llvm::BasicBlock *FalseDest = NULL; + llvm::BasicBlock *CaseDest = new llvm::BasicBlock("sw.bb"); + + // If we have already seen one case statement range for this switch + // instruction then piggy-back otherwise use default block as false + // destination. + if (CaseRangeBlock) + FalseDest = CaseRangeBlock; + else + FalseDest = SwitchInsn->getDefaultDest(); + + // Start new block to hold case statement range check instructions. + StartBlock("case.range"); + CaseRangeBlock = Builder.GetInsertBlock(); + + // Emit range check. + llvm::Value *Diff = + Builder.CreateSub(SwitchInsn->getCondition(), LV, "tmp"); + llvm::Value *Cond = + Builder.CreateICmpULE(Diff, llvm::ConstantInt::get(Range), "tmp"); + Builder.CreateCondBr(Cond, CaseDest, FalseDest); + + // Now emit case statement body. + EmitBlock(CaseDest); + EmitStmt(S.getSubStmt()); +} + +void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) { + if (S.getRHS()) { + EmitCaseStmtRange(S); + return; + } + + StartBlock("sw.bb"); + llvm::BasicBlock *CaseDest = Builder.GetInsertBlock(); + llvm::APSInt CaseVal(32); + S.getLHS()->isIntegerConstantExpr(CaseVal, getContext()); + llvm::ConstantInt *LV = llvm::ConstantInt::get(CaseVal); + SwitchInsn->addCase(LV, CaseDest); + EmitStmt(S.getSubStmt()); +} + +void CodeGenFunction::EmitDefaultStmt(const DefaultStmt &S) { + StartBlock("sw.default"); + // Current insert block is the default destination. + SwitchInsn->setSuccessor(0, Builder.GetInsertBlock()); + EmitStmt(S.getSubStmt()); +} + +void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { + llvm::Value *CondV = EmitScalarExpr(S.getCond()); + + // Handle nested switch statements. + llvm::SwitchInst *SavedSwitchInsn = SwitchInsn; + llvm::BasicBlock *SavedCRBlock = CaseRangeBlock; + CaseRangeBlock = NULL; + + // Create basic block to hold stuff that comes after switch statement. + // Initially use it to hold DefaultStmt. + llvm::BasicBlock *NextBlock = new llvm::BasicBlock("after.sw"); + SwitchInsn = Builder.CreateSwitch(CondV, NextBlock); + + // All break statements jump to NextBlock. If BreakContinueStack is non empty + // then reuse last ContinueBlock. + llvm::BasicBlock *ContinueBlock = NULL; + if (!BreakContinueStack.empty()) + ContinueBlock = BreakContinueStack.back().ContinueBlock; + BreakContinueStack.push_back(BreakContinue(NextBlock, ContinueBlock)); + + // Emit switch body. + EmitStmt(S.getBody()); + BreakContinueStack.pop_back(); + + // If one or more case statement range is seen then use CaseRangeBlock + // as the default block. False edge of CaseRangeBlock will lead to + // original default block. + if (CaseRangeBlock) + SwitchInsn->setSuccessor(0, CaseRangeBlock); + + // Prune insert block if it is dummy. + llvm::BasicBlock *BB = Builder.GetInsertBlock(); + if (isDummyBlock(BB)) + BB->eraseFromParent(); + else // Otherwise, branch to continuation. + Builder.CreateBr(NextBlock); + + // Place NextBlock as the new insert point. + CurFn->getBasicBlockList().push_back(NextBlock); + Builder.SetInsertPoint(NextBlock); + SwitchInsn = SavedSwitchInsn; + CaseRangeBlock = SavedCRBlock; +} + +static inline std::string ConvertAsmString(const char *Start, + unsigned NumOperands, + bool IsSimple) +{ + static unsigned AsmCounter = 0; + + AsmCounter++; + + std::string Result; + if (IsSimple) { + while (*Start) { + switch (*Start) { + default: + Result += *Start; + break; + case '$': + Result += "$$"; + break; + } + + Start++; + } + + return Result; + } + + while (*Start) { + switch (*Start) { + default: + Result += *Start; + break; + case '$': + Result += "$$"; + break; + case '%': + // Escaped character + Start++; + if (!*Start) { + // FIXME: This should be caught during Sema. + assert(0 && "Trailing '%' in asm string."); + } + + char EscapedChar = *Start; + if (EscapedChar == '%') { + // Escaped percentage sign. + Result += '%'; + } + else if (EscapedChar == '=') { + // Generate an unique ID. + Result += llvm::utostr(AsmCounter); + } else if (isdigit(EscapedChar)) { + // %n - Assembler operand n + char *End; + + unsigned long n = strtoul(Start, &End, 10); + if (Start == End) { + // FIXME: This should be caught during Sema. + assert(0 && "Missing operand!"); + } else if (n >= NumOperands) { + // FIXME: This should be caught during Sema. + assert(0 && "Operand number out of range!"); + } + + Result += '$' + llvm::utostr(n); + Start = End - 1; + } else if (isalpha(EscapedChar)) { + char *End; + + unsigned long n = strtoul(Start + 1, &End, 10); + if (Start == End) { + // FIXME: This should be caught during Sema. + assert(0 && "Missing operand!"); + } else if (n >= NumOperands) { + // FIXME: This should be caught during Sema. + assert(0 && "Operand number out of range!"); + } + + Result += "${" + llvm::utostr(n) + ':' + EscapedChar + '}'; + Start = End - 1; + } else { + assert(0 && "Unhandled asm escaped character!"); + } + } + Start++; + } + + return Result; +} + +static std::string SimplifyConstraint(const char* Constraint, + TargetInfo &Target) { + std::string Result; + + while (*Constraint) { + switch (*Constraint) { + default: + Result += Target.convertConstraint(*Constraint); + break; + // Ignore these + case '*': + case '?': + case '!': + break; + case 'g': + Result += "imr"; + break; + } + + Constraint++; + } + + return Result; +} + +void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { + std::string AsmString = + ConvertAsmString(std::string(S.getAsmString()->getStrData(), + S.getAsmString()->getByteLength()).c_str(), + S.getNumOutputs() + S.getNumInputs(), S.isSimple()); + + std::string Constraints; + + llvm::Value *ResultAddr = 0; + const llvm::Type *ResultType = llvm::Type::VoidTy; + + std::vector<const llvm::Type*> ArgTypes; + std::vector<llvm::Value*> Args; + + // Keep track of inout constraints. + std::string InOutConstraints; + std::vector<llvm::Value*> InOutArgs; + std::vector<const llvm::Type*> InOutArgTypes; + + for (unsigned i = 0, e = S.getNumOutputs(); i != e; i++) { + std::string OutputConstraint(S.getOutputConstraint(i)->getStrData(), + S.getOutputConstraint(i)->getByteLength()); + + TargetInfo::ConstraintInfo Info; + bool result = Target.validateOutputConstraint(OutputConstraint.c_str(), + Info); + assert(result && "Failed to parse output constraint"); + + // Simplify the output constraint. + OutputConstraint = SimplifyConstraint(OutputConstraint.c_str() + 1, Target); + + LValue Dest = EmitLValue(S.getOutputExpr(i)); + const llvm::Type *DestValueType = + cast<llvm::PointerType>(Dest.getAddress()->getType())->getElementType(); + + // If the first output operand is not a memory dest, we'll + // make it the return value. + if (i == 0 && !(Info & TargetInfo::CI_AllowsMemory) && + DestValueType->isFirstClassType()) { + ResultAddr = Dest.getAddress(); + ResultType = DestValueType; + Constraints += "=" + OutputConstraint; + } else { + ArgTypes.push_back(Dest.getAddress()->getType()); + Args.push_back(Dest.getAddress()); + if (i != 0) + Constraints += ','; + Constraints += "=*"; + Constraints += OutputConstraint; + } + + if (Info & TargetInfo::CI_ReadWrite) { + // FIXME: This code should be shared with the code that handles inputs. + InOutConstraints += ','; + + const Expr *InputExpr = S.getOutputExpr(i); + llvm::Value *Arg; + if ((Info & TargetInfo::CI_AllowsRegister) || + !(Info & TargetInfo::CI_AllowsMemory)) { + if (ConvertType(InputExpr->getType())->isFirstClassType()) { + Arg = EmitScalarExpr(InputExpr); + } else { + assert(0 && "FIXME: Implement passing non first class types as inputs"); + } + } else { + LValue Dest = EmitLValue(InputExpr); + Arg = Dest.getAddress(); + InOutConstraints += '*'; + } + + InOutArgTypes.push_back(Arg->getType()); + InOutArgs.push_back(Arg); + InOutConstraints += OutputConstraint; + } + } + + unsigned NumConstraints = S.getNumOutputs() + S.getNumInputs(); + + for (unsigned i = 0, e = S.getNumInputs(); i != e; i++) { + const Expr *InputExpr = S.getInputExpr(i); + + std::string InputConstraint(S.getInputConstraint(i)->getStrData(), + S.getInputConstraint(i)->getByteLength()); + + TargetInfo::ConstraintInfo Info; + bool result = Target.validateInputConstraint(InputConstraint.c_str(), + NumConstraints, + Info); + assert(result && "Failed to parse input constraint"); + + if (i != 0 || S.getNumOutputs() > 0) + Constraints += ','; + + // Simplify the input constraint. + InputConstraint = SimplifyConstraint(InputConstraint.c_str(), Target); + + llvm::Value *Arg; + + if ((Info & TargetInfo::CI_AllowsRegister) || + !(Info & TargetInfo::CI_AllowsMemory)) { + if (ConvertType(InputExpr->getType())->isFirstClassType()) { + Arg = EmitScalarExpr(InputExpr); + } else { + assert(0 && "FIXME: Implement passing non first class types as inputs"); + } + } else { + LValue Dest = EmitLValue(InputExpr); + Arg = Dest.getAddress(); + Constraints += '*'; + } + + ArgTypes.push_back(Arg->getType()); + Args.push_back(Arg); + Constraints += InputConstraint; + } + + // Append the "input" part of inout constraints last. + for (unsigned i = 0, e = InOutArgs.size(); i != e; i++) { + ArgTypes.push_back(InOutArgTypes[i]); + Args.push_back(InOutArgs[i]); + } + Constraints += InOutConstraints; + + // Clobbers + for (unsigned i = 0, e = S.getNumClobbers(); i != e; i++) { + std::string Clobber(S.getClobber(i)->getStrData(), + S.getClobber(i)->getByteLength()); + + Clobber = Target.getNormalizedGCCRegisterName(Clobber.c_str()); + + if (i != 0 || NumConstraints != 0) + Constraints += ','; + + Constraints += "~{"; + Constraints += Clobber; + Constraints += '}'; + } + + // Add machine specific clobbers + if (const char *C = Target.getClobbers()) { + if (!Constraints.empty()) + Constraints += ','; + Constraints += C; + } + + const llvm::FunctionType *FTy = + llvm::FunctionType::get(ResultType, ArgTypes, false); + + llvm::InlineAsm *IA = + llvm::InlineAsm::get(FTy, AsmString, Constraints, + S.isVolatile() || S.getNumOutputs() == 0); + llvm::Value *Result = Builder.CreateCall(IA, Args.begin(), Args.end(), ""); + if (ResultAddr) + Builder.CreateStore(Result, ResultAddr); +} diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp new file mode 100644 index 00000000000..49f23f27c34 --- /dev/null +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -0,0 +1,182 @@ +//===--- CodeGenFunction.cpp - Emit LLVM Code from ASTs for a Function ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This coordinates the per-function state used while generating code. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "CodeGenModule.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/AST/AST.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Support/CFG.h" +using namespace clang; +using namespace CodeGen; + +CodeGenFunction::CodeGenFunction(CodeGenModule &cgm) + : CGM(cgm), Target(CGM.getContext().Target), SwitchInsn(NULL), + CaseRangeBlock(NULL) {} + +ASTContext &CodeGenFunction::getContext() const { + return CGM.getContext(); +} + + +llvm::BasicBlock *CodeGenFunction::getBasicBlockForLabel(const LabelStmt *S) { + llvm::BasicBlock *&BB = LabelMap[S]; + if (BB) return BB; + + // Create, but don't insert, the new block. + return BB = new llvm::BasicBlock(S->getName()); +} + +llvm::Constant * +CodeGenFunction::GetAddrOfStaticLocalVar(const BlockVarDecl *BVD) { + return cast<llvm::Constant>(LocalDeclMap[BVD]); +} + +const llvm::Type *CodeGenFunction::ConvertType(QualType T) { + return CGM.getTypes().ConvertType(T); +} + +bool CodeGenFunction::hasAggregateLLVMType(QualType T) { + return !T->isRealType() && !T->isPointerType() && !T->isReferenceType() && + !T->isVoidType() && !T->isVectorType() && !T->isFunctionType(); +} + + +void CodeGenFunction::GenerateCode(const FunctionDecl *FD) { + LLVMIntTy = ConvertType(getContext().IntTy); + LLVMPointerWidth = static_cast<unsigned>( + getContext().getTypeSize(getContext().getPointerType(getContext().VoidTy))); + + CurFuncDecl = FD; + CurFn = cast<llvm::Function>(CGM.GetAddrOfFunctionDecl(FD, true)); + assert(CurFn->isDeclaration() && "Function already has body?"); + + // TODO: Set up linkage and many other things. Note, this is a simple + // approximation of what we really want. + if (FD->getAttr<DLLImportAttr>()) + CurFn->setLinkage(llvm::Function::DLLImportLinkage); + else if (FD->getAttr<DLLExportAttr>()) + CurFn->setLinkage(llvm::Function::DLLExportLinkage); + else if (FD->getAttr<WeakAttr>() || FD->isInline()) + CurFn->setLinkage(llvm::Function::WeakLinkage); + else if (FD->getStorageClass() == FunctionDecl::Static) + CurFn->setLinkage(llvm::Function::InternalLinkage); + + if (FD->getAttr<FastCallAttr>()) + CurFn->setCallingConv(llvm::CallingConv::Fast); + + if (const VisibilityAttr *attr = FD->getAttr<VisibilityAttr>()) + CurFn->setVisibility(attr->getVisibility()); + // FIXME: else handle -fvisibility + + + unsigned FuncAttrs = 0; + if (FD->getAttr<NoThrowAttr>()) + FuncAttrs |= llvm::ParamAttr::NoUnwind; + if (FD->getAttr<NoReturnAttr>()) + FuncAttrs |= llvm::ParamAttr::NoReturn; + + if (FuncAttrs) { + llvm::ParamAttrsWithIndex PAWI = + llvm::ParamAttrsWithIndex::get(0, FuncAttrs); + CurFn->setParamAttrs(llvm::PAListPtr::get(&PAWI, 1)); + } + + llvm::BasicBlock *EntryBB = new llvm::BasicBlock("entry", CurFn); + + // Create a marker to make it easy to insert allocas into the entryblock + // later. Don't create this with the builder, because we don't want it + // folded. + llvm::Value *Undef = llvm::UndefValue::get(llvm::Type::Int32Ty); + AllocaInsertPt = new llvm::BitCastInst(Undef, llvm::Type::Int32Ty, "allocapt", + EntryBB); + + Builder.SetInsertPoint(EntryBB); + + // Emit allocs for param decls. Give the LLVM Argument nodes names. + llvm::Function::arg_iterator AI = CurFn->arg_begin(); + + // Name the struct return argument. + if (hasAggregateLLVMType(FD->getResultType())) { + AI->setName("agg.result"); + ++AI; + } + + for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i, ++AI) { + assert(AI != CurFn->arg_end() && "Argument mismatch!"); + EmitParmDecl(*FD->getParamDecl(i), AI); + } + + // Emit the function body. + EmitStmt(FD->getBody()); + + // Emit a return for code that falls off the end. If insert point + // is a dummy block with no predecessors then remove the block itself. + llvm::BasicBlock *BB = Builder.GetInsertBlock(); + if (isDummyBlock(BB)) + BB->eraseFromParent(); + else { + // FIXME: if this is C++ main, this should return 0. + if (CurFn->getReturnType() == llvm::Type::VoidTy) + Builder.CreateRetVoid(); + else + Builder.CreateRet(llvm::UndefValue::get(CurFn->getReturnType())); + } + assert(BreakContinueStack.empty() && + "mismatched push/pop in break/continue stack!"); + + // Remove the AllocaInsertPt instruction, which is just a convenience for us. + AllocaInsertPt->eraseFromParent(); + AllocaInsertPt = 0; + + // Verify that the function is well formed. + assert(!verifyFunction(*CurFn)); +} + +/// isDummyBlock - Return true if BB is an empty basic block +/// with no predecessors. +bool CodeGenFunction::isDummyBlock(const llvm::BasicBlock *BB) { + if (BB->empty() && pred_begin(BB) == pred_end(BB)) + return true; + return false; +} + +/// StartBlock - Start new block named N. If insert block is a dummy block +/// then reuse it. +void CodeGenFunction::StartBlock(const char *N) { + llvm::BasicBlock *BB = Builder.GetInsertBlock(); + if (!isDummyBlock(BB)) + EmitBlock(new llvm::BasicBlock(N)); + else + BB->setName(N); +} + +/// getCGRecordLayout - Return record layout info. +const CGRecordLayout *CodeGenFunction::getCGRecordLayout(CodeGenTypes &CGT, + QualType Ty) { + const RecordType *RTy = Ty->getAsRecordType(); + assert (RTy && "Unexpected type. RecordType expected here."); + + return CGT.getCGRecordLayout(RTy->getDecl()); +} + +/// WarnUnsupported - Print out a warning that codegen doesn't support the +/// specified stmt yet. +void CodeGenFunction::WarnUnsupported(const Stmt *S, const char *Type) { + CGM.WarnUnsupported(S, Type); +} + diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h new file mode 100644 index 00000000000..509e8296d20 --- /dev/null +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -0,0 +1,486 @@ +//===--- CodeGenFunction.h - Per-Function state for LLVM CodeGen ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the internal per-function state used for llvm translation. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_CODEGEN_CODEGENFUNCTION_H +#define CLANG_CODEGEN_CODEGENFUNCTION_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/LLVMBuilder.h" +#include <vector> + +namespace llvm { + class Module; +} + +namespace clang { + class ASTContext; + class Decl; + class FunctionDecl; + class TargetInfo; + class QualType; + class FunctionTypeProto; + + class Stmt; + class CompoundStmt; + class LabelStmt; + class GotoStmt; + class IfStmt; + class WhileStmt; + class DoStmt; + class ForStmt; + class ReturnStmt; + class DeclStmt; + class CaseStmt; + class DefaultStmt; + class SwitchStmt; + class AsmStmt; + + class Expr; + class DeclRefExpr; + class StringLiteral; + class IntegerLiteral; + class FloatingLiteral; + class CharacterLiteral; + class TypesCompatibleExpr; + + class ImplicitCastExpr; + class CastExpr; + class CallExpr; + class UnaryOperator; + class BinaryOperator; + class CompoundAssignOperator; + class ArraySubscriptExpr; + class OCUVectorElementExpr; + class ConditionalOperator; + class ChooseExpr; + class PreDefinedExpr; + class ObjCStringLiteral; + class MemberExpr; + + class BlockVarDecl; + class EnumConstantDecl; + class ParmVarDecl; + class FieldDecl; +namespace CodeGen { + class CodeGenModule; + class CodeGenTypes; + class CGRecordLayout; + +/// RValue - This trivial value class is used to represent the result of an +/// expression that is evaluated. It can be one of three things: either a +/// simple LLVM SSA value, a pair of SSA values for complex numbers, or the +/// address of an aggregate value in memory. +class RValue { + llvm::Value *V1, *V2; + // TODO: Encode this into the low bit of pointer for more efficient + // return-by-value. + enum { Scalar, Complex, Aggregate } Flavor; + + // FIXME: Aggregate rvalues need to retain information about whether they are + // volatile or not. +public: + + bool isScalar() const { return Flavor == Scalar; } + bool isComplex() const { return Flavor == Complex; } + bool isAggregate() const { return Flavor == Aggregate; } + + /// getScalar() - Return the Value* of this scalar value. + llvm::Value *getScalarVal() const { + assert(isScalar() && "Not a scalar!"); + return V1; + } + + /// getComplexVal - Return the real/imag components of this complex value. + /// + std::pair<llvm::Value *, llvm::Value *> getComplexVal() const { + return std::pair<llvm::Value *, llvm::Value *>(V1, V2); + } + + /// getAggregateAddr() - Return the Value* of the address of the aggregate. + llvm::Value *getAggregateAddr() const { + assert(isAggregate() && "Not an aggregate!"); + return V1; + } + + static RValue get(llvm::Value *V) { + RValue ER; + ER.V1 = V; + ER.Flavor = Scalar; + return ER; + } + static RValue getComplex(llvm::Value *V1, llvm::Value *V2) { + RValue ER; + ER.V1 = V1; + ER.V2 = V2; + ER.Flavor = Complex; + return ER; + } + static RValue getComplex(const std::pair<llvm::Value *, llvm::Value *> &C) { + RValue ER; + ER.V1 = C.first; + ER.V2 = C.second; + ER.Flavor = Complex; + return ER; + } + static RValue getAggregate(llvm::Value *V) { + RValue ER; + ER.V1 = V; + ER.Flavor = Aggregate; + return ER; + } +}; + + +/// LValue - This represents an lvalue references. Because C/C++ allow +/// bitfields, this is not a simple LLVM pointer, it may be a pointer plus a +/// bitrange. +class LValue { + // FIXME: Volatility. Restrict? + // alignment? + + enum { + Simple, // This is a normal l-value, use getAddress(). + VectorElt, // This is a vector element l-value (V[i]), use getVector* + BitField, // This is a bitfield l-value, use getBitfield*. + OCUVectorElt // This is an ocu vector subset, use getOCUVectorComp + } LVType; + + llvm::Value *V; + + union { + llvm::Value *VectorIdx; // Index into a vector subscript: V[i] + unsigned VectorElts; // Encoded OCUVector element subset: V.xyx + struct { + unsigned short StartBit; + unsigned short Size; + bool IsSigned; + } BitfieldData; // BitField start bit and size + }; +public: + bool isSimple() const { return LVType == Simple; } + bool isVectorElt() const { return LVType == VectorElt; } + bool isBitfield() const { return LVType == BitField; } + bool isOCUVectorElt() const { return LVType == OCUVectorElt; } + + // simple lvalue + llvm::Value *getAddress() const { assert(isSimple()); return V; } + // vector elt lvalue + llvm::Value *getVectorAddr() const { assert(isVectorElt()); return V; } + llvm::Value *getVectorIdx() const { assert(isVectorElt()); return VectorIdx; } + // ocu vector elements. + llvm::Value *getOCUVectorAddr() const { assert(isOCUVectorElt()); return V; } + unsigned getOCUVectorElts() const { + assert(isOCUVectorElt()); + return VectorElts; + } + // bitfield lvalue + llvm::Value *getBitfieldAddr() const { assert(isBitfield()); return V; } + unsigned short getBitfieldStartBit() const { + assert(isBitfield()); + return BitfieldData.StartBit; + } + unsigned short getBitfieldSize() const { + assert(isBitfield()); + return BitfieldData.Size; + } + bool isBitfieldSigned() const { + assert(isBitfield()); + return BitfieldData.IsSigned; + } + + static LValue MakeAddr(llvm::Value *V) { + LValue R; + R.LVType = Simple; + R.V = V; + return R; + } + + static LValue MakeVectorElt(llvm::Value *Vec, llvm::Value *Idx) { + LValue R; + R.LVType = VectorElt; + R.V = Vec; + R.VectorIdx = Idx; + return R; + } + + static LValue MakeOCUVectorElt(llvm::Value *Vec, unsigned Elements) { + LValue R; + R.LVType = OCUVectorElt; + R.V = Vec; + R.VectorElts = Elements; + return R; + } + + static LValue MakeBitfield(llvm::Value *V, unsigned short StartBit, + unsigned short Size, bool IsSigned) { + LValue R; + R.LVType = BitField; + R.V = V; + R.BitfieldData.StartBit = StartBit; + R.BitfieldData.Size = Size; + R.BitfieldData.IsSigned = IsSigned; + return R; + } +}; + +/// CodeGenFunction - This class organizes the per-function state that is used +/// while generating LLVM code. +class CodeGenFunction { +public: + CodeGenModule &CGM; // Per-module state. + TargetInfo &Target; + + typedef std::pair<llvm::Value *, llvm::Value *> ComplexPairTy; + llvm::LLVMFoldingBuilder Builder; + + const FunctionDecl *CurFuncDecl; + llvm::Function *CurFn; + + /// AllocaInsertPoint - This is an instruction in the entry block before which + /// we prefer to insert allocas. + llvm::Instruction *AllocaInsertPt; + + const llvm::Type *LLVMIntTy; + uint32_t LLVMPointerWidth; + +private: + /// LocalDeclMap - This keeps track of the LLVM allocas or globals for local C + /// decls. + llvm::DenseMap<const Decl*, llvm::Value*> LocalDeclMap; + + /// LabelMap - This keeps track of the LLVM basic block for each C label. + llvm::DenseMap<const LabelStmt*, llvm::BasicBlock*> LabelMap; + + // BreakContinueStack - This keeps track of where break and continue + // statements should jump to. + struct BreakContinue { + BreakContinue(llvm::BasicBlock *bb, llvm::BasicBlock *cb) + : BreakBlock(bb), ContinueBlock(cb) {} + + llvm::BasicBlock *BreakBlock; + llvm::BasicBlock *ContinueBlock; + }; + llvm::SmallVector<BreakContinue, 8> BreakContinueStack; + + /// SwitchInsn - This is nearest current switch instruction. It is null if + /// if current context is not in a switch. + llvm::SwitchInst *SwitchInsn; + + /// CaseRangeBlock - This block holds if condition check for last case + /// statement range in current switch instruction. + llvm::BasicBlock *CaseRangeBlock; + +public: + CodeGenFunction(CodeGenModule &cgm); + + ASTContext &getContext() const; + + void GenerateCode(const FunctionDecl *FD); + + const llvm::Type *ConvertType(QualType T); + + /// hasAggregateLLVMType - Return true if the specified AST type will map into + /// an aggregate LLVM type or is void. + static bool hasAggregateLLVMType(QualType T); + + /// getBasicBlockForLabel - Return the LLVM basicblock that the specified + /// label maps to. + llvm::BasicBlock *getBasicBlockForLabel(const LabelStmt *S); + + + void EmitBlock(llvm::BasicBlock *BB); + + /// WarnUnsupported - Print out a warning that codegen doesn't support the + /// specified stmt yet. + void WarnUnsupported(const Stmt *S, const char *Type); + + //===--------------------------------------------------------------------===// + // Helpers + //===--------------------------------------------------------------------===// + + /// CreateTempAlloca - This creates a alloca and inserts it into the entry + /// block. + llvm::AllocaInst *CreateTempAlloca(const llvm::Type *Ty, + const char *Name = "tmp"); + + /// EvaluateExprAsBool - Perform the usual unary conversions on the specified + /// expression and compare the result against zero, returning an Int1Ty value. + llvm::Value *EvaluateExprAsBool(const Expr *E); + + /// EmitAnyExpr - Emit code to compute the specified expression which can have + /// any type. The result is returned as an RValue struct. If this is an + /// aggregate expression, the aggloc/agglocvolatile arguments indicate where + /// the result should be returned. + RValue EmitAnyExpr(const Expr *E, llvm::Value *AggLoc = 0, + bool isAggLocVolatile = false); + + /// isDummyBlock - Return true if BB is an empty basic block + /// with no predecessors. + static bool isDummyBlock(const llvm::BasicBlock *BB); + + /// StartBlock - Start new block named N. If insert block is a dummy block + /// then reuse it. + void StartBlock(const char *N); + + /// getCGRecordLayout - Return record layout info. + const CGRecordLayout *getCGRecordLayout(CodeGenTypes &CGT, QualType RTy); + + /// GetAddrOfStaticLocalVar - Return the address of a static local variable. + llvm::Constant *GetAddrOfStaticLocalVar(const BlockVarDecl *BVD); + //===--------------------------------------------------------------------===// + // Declaration Emission + //===--------------------------------------------------------------------===// + + void EmitDecl(const Decl &D); + void EmitEnumConstantDecl(const EnumConstantDecl &D); + void EmitBlockVarDecl(const BlockVarDecl &D); + void EmitLocalBlockVarDecl(const BlockVarDecl &D); + void EmitStaticBlockVarDecl(const BlockVarDecl &D); + void EmitParmDecl(const ParmVarDecl &D, llvm::Value *Arg); + + //===--------------------------------------------------------------------===// + // Statement Emission + //===--------------------------------------------------------------------===// + + void EmitStmt(const Stmt *S); + RValue EmitCompoundStmt(const CompoundStmt &S, bool GetLast = false, + llvm::Value *AggLoc = 0, bool isAggVol = false); + void EmitLabelStmt(const LabelStmt &S); + void EmitGotoStmt(const GotoStmt &S); + void EmitIfStmt(const IfStmt &S); + void EmitWhileStmt(const WhileStmt &S); + void EmitDoStmt(const DoStmt &S); + void EmitForStmt(const ForStmt &S); + void EmitReturnStmt(const ReturnStmt &S); + void EmitDeclStmt(const DeclStmt &S); + void EmitBreakStmt(); + void EmitContinueStmt(); + void EmitSwitchStmt(const SwitchStmt &S); + void EmitDefaultStmt(const DefaultStmt &S); + void EmitCaseStmt(const CaseStmt &S); + void EmitCaseStmtRange(const CaseStmt &S); + void EmitAsmStmt(const AsmStmt &S); + + //===--------------------------------------------------------------------===// + // LValue Expression Emission + //===--------------------------------------------------------------------===// + + /// EmitLValue - Emit code to compute a designator that specifies the location + /// of the expression. + /// + /// This can return one of two things: a simple address or a bitfield + /// reference. In either case, the LLVM Value* in the LValue structure is + /// guaranteed to be an LLVM pointer type. + /// + /// If this returns a bitfield reference, nothing about the pointee type of + /// the LLVM value is known: For example, it may not be a pointer to an + /// integer. + /// + /// If this returns a normal address, and if the lvalue's C type is fixed + /// size, this method guarantees that the returned pointer type will point to + /// an LLVM type of the same size of the lvalue's type. If the lvalue has a + /// variable length type, this is not possible. + /// + LValue EmitLValue(const Expr *E); + + /// EmitLoadOfLValue - Given an expression that represents a value lvalue, + /// this method emits the address of the lvalue, then loads the result as an + /// rvalue, returning the rvalue. + RValue EmitLoadOfLValue(LValue V, QualType LVType); + RValue EmitLoadOfOCUElementLValue(LValue V, QualType LVType); + RValue EmitLoadOfBitfieldLValue(LValue LV, QualType ExprType); + + + /// EmitStoreThroughLValue - Store the specified rvalue into the specified + /// lvalue, where both are guaranteed to the have the same type, and that type + /// is 'Ty'. + void EmitStoreThroughLValue(RValue Src, LValue Dst, QualType Ty); + void EmitStoreThroughOCUComponentLValue(RValue Src, LValue Dst, QualType Ty); + void EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, QualType Ty); + + // Note: only availabe for agg return types + LValue EmitCallExprLValue(const CallExpr *E); + + LValue EmitDeclRefLValue(const DeclRefExpr *E); + LValue EmitStringLiteralLValue(const StringLiteral *E); + LValue EmitPreDefinedLValue(const PreDefinedExpr *E); + LValue EmitUnaryOpLValue(const UnaryOperator *E); + LValue EmitArraySubscriptExpr(const ArraySubscriptExpr *E); + LValue EmitOCUVectorElementExpr(const OCUVectorElementExpr *E); + LValue EmitMemberExpr(const MemberExpr *E); + + LValue EmitLValueForField(llvm::Value* Base, FieldDecl* Field, + bool isUnion); + + //===--------------------------------------------------------------------===// + // Scalar Expression Emission + //===--------------------------------------------------------------------===// + + RValue EmitCallExpr(const CallExpr *E); + RValue EmitCallExpr(Expr *FnExpr, Expr *const *Args, unsigned NumArgs); + RValue EmitCallExpr(llvm::Value *Callee, QualType FnType, + Expr *const *Args, unsigned NumArgs); + RValue EmitBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + + llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + + llvm::Value *EmitShuffleVector(llvm::Value* V1, llvm::Value *V2, ...); + llvm::Value *EmitVector(llvm::Value * const *Vals, unsigned NumVals, + bool isSplat = false); + + llvm::Value *EmitObjCStringLiteral(const ObjCStringLiteral *E); + + //===--------------------------------------------------------------------===// + // Expression Emission + //===--------------------------------------------------------------------===// + + // Expressions are broken into three classes: scalar, complex, aggregate. + + /// EmitScalarExpr - Emit the computation of the specified expression of + /// LLVM scalar type, returning the result. + llvm::Value *EmitScalarExpr(const Expr *E); + + /// EmitScalarConversion - Emit a conversion from the specified type to the + /// specified destination type, both of which are LLVM scalar types. + llvm::Value *EmitScalarConversion(llvm::Value *Src, QualType SrcTy, + QualType DstTy); + + /// EmitComplexToScalarConversion - Emit a conversion from the specified + /// complex type to the specified destination type, where the destination + /// type is an LLVM scalar type. + llvm::Value *EmitComplexToScalarConversion(ComplexPairTy Src, QualType SrcTy, + QualType DstTy); + + + /// EmitAggExpr - Emit the computation of the specified expression of + /// aggregate type. The result is computed into DestPtr. Note that if + /// DestPtr is null, the value of the aggregate expression is not needed. + void EmitAggExpr(const Expr *E, llvm::Value *DestPtr, bool VolatileDest); + + /// EmitComplexExpr - Emit the computation of the specified expression of + /// complex type, returning the result. + ComplexPairTy EmitComplexExpr(const Expr *E); + + /// EmitComplexExprIntoAddr - Emit the computation of the specified expression + /// of complex type, storing into the specified Value*. + void EmitComplexExprIntoAddr(const Expr *E, llvm::Value *DestAddr, + bool DestIsVolatile); + /// LoadComplexFromAddr - Load a complex number from the specified address. + ComplexPairTy LoadComplexFromAddr(llvm::Value *SrcAddr, bool SrcIsVolatile); +}; +} // end namespace CodeGen +} // end namespace clang + +#endif diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp new file mode 100644 index 00000000000..43f399a61f2 --- /dev/null +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -0,0 +1,509 @@ +//===--- CodeGenModule.cpp - Emit LLVM Code from ASTs for a Module --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This coordinates the per-module state used while generating code. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenModule.h" +#include "CodeGenFunction.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Intrinsics.h" +#include <algorithm> +using namespace clang; +using namespace CodeGen; + + +CodeGenModule::CodeGenModule(ASTContext &C, const LangOptions &LO, + llvm::Module &M, const llvm::TargetData &TD, + Diagnostic &diags) + : Context(C), Features(LO), TheModule(M), TheTargetData(TD), Diags(diags), + Types(C, M, TD), MemCpyFn(0), MemSetFn(0), CFConstantStringClassRef(0) { + //TODO: Make this selectable at runtime + Runtime = CreateObjCRuntime(M); +} + +CodeGenModule::~CodeGenModule() { + EmitGlobalCtors(); + delete Runtime; +} + +/// WarnUnsupported - Print out a warning that codegen doesn't support the +/// specified stmt yet. +void CodeGenModule::WarnUnsupported(const Stmt *S, const char *Type) { + unsigned DiagID = getDiags().getCustomDiagID(Diagnostic::Warning, + "cannot codegen this %0 yet"); + SourceRange Range = S->getSourceRange(); + std::string Msg = Type; + getDiags().Report(Context.getFullLoc(S->getLocStart()), DiagID, + &Msg, 1, &Range, 1); +} + +/// WarnUnsupported - Print out a warning that codegen doesn't support the +/// specified decl yet. +void CodeGenModule::WarnUnsupported(const Decl *D, const char *Type) { + unsigned DiagID = getDiags().getCustomDiagID(Diagnostic::Warning, + "cannot codegen this %0 yet"); + std::string Msg = Type; + getDiags().Report(Context.getFullLoc(D->getLocation()), DiagID, + &Msg, 1); +} + +/// AddGlobalCtor - Add a function to the list that will be called before +/// main() runs. +void CodeGenModule::AddGlobalCtor(llvm::Function * Ctor) { + // TODO: Type coercion of void()* types. + GlobalCtors.push_back(Ctor); +} + +void CodeGenModule::EmitGlobalCtors() { + // Get the type of @llvm.global_ctors + std::vector<const llvm::Type*> CtorFields; + CtorFields.push_back(llvm::IntegerType::get(32)); + // Constructor function type + std::vector<const llvm::Type*> VoidArgs; + llvm::FunctionType* CtorFuncTy = llvm::FunctionType::get( + llvm::Type::VoidTy, + VoidArgs, + false); + // i32, function type pair + CtorFields.push_back(llvm::PointerType::getUnqual(CtorFuncTy)); + llvm::StructType* CtorStructTy = llvm::StructType::get(CtorFields, false); + // Array of fields + llvm::ArrayType* GlobalCtorsTy = llvm::ArrayType::get(CtorStructTy, + GlobalCtors.size()); + + const std::string GlobalCtorsVar = std::string("llvm.global_ctors"); + // Define the global variable + llvm::GlobalVariable *GlobalCtorsVal = new llvm::GlobalVariable( + GlobalCtorsTy, + false, + llvm::GlobalValue::AppendingLinkage, + (llvm::Constant*)0, + GlobalCtorsVar, + &TheModule); + + // Populate the array + std::vector<llvm::Constant*> CtorValues; + llvm::Constant *MagicNumber = llvm::ConstantInt::get(llvm::IntegerType::Int32Ty, + 65535, + false); + for (std::vector<llvm::Constant*>::iterator I = GlobalCtors.begin(), + E = GlobalCtors.end(); I != E; ++I) { + std::vector<llvm::Constant*> StructValues; + StructValues.push_back(MagicNumber); + StructValues.push_back(*I); + + llvm::Constant* CtorEntry = llvm::ConstantStruct::get(CtorStructTy, StructValues); + CtorValues.push_back(CtorEntry); + } + llvm::Constant* CtorArray = llvm::ConstantArray::get(GlobalCtorsTy, CtorValues); + GlobalCtorsVal->setInitializer(CtorArray); + +} + +/// ReplaceMapValuesWith - This is a really slow and bad function that +/// searches for any entries in GlobalDeclMap that point to OldVal, changing +/// them to point to NewVal. This is badbadbad, FIXME! +void CodeGenModule::ReplaceMapValuesWith(llvm::Constant *OldVal, + llvm::Constant *NewVal) { + for (llvm::DenseMap<const Decl*, llvm::Constant*>::iterator + I = GlobalDeclMap.begin(), E = GlobalDeclMap.end(); I != E; ++I) + if (I->second == OldVal) I->second = NewVal; +} + + +llvm::Constant *CodeGenModule::GetAddrOfFunctionDecl(const FunctionDecl *D, + bool isDefinition) { + // See if it is already in the map. If so, just return it. + llvm::Constant *&Entry = GlobalDeclMap[D]; + if (Entry) return Entry; + + const llvm::Type *Ty = getTypes().ConvertType(D->getType()); + + // Check to see if the function already exists. + llvm::Function *F = getModule().getFunction(D->getName()); + const llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); + + // If it doesn't already exist, just create and return an entry. + if (F == 0) { + // FIXME: param attributes for sext/zext etc. + F = new llvm::Function(FTy, llvm::Function::ExternalLinkage, D->getName(), + &getModule()); + + // Set the appropriate calling convention for the Function. + if (D->getAttr<FastCallAttr>()) + F->setCallingConv(llvm::CallingConv::Fast); + return Entry = F; + } + + // If the pointer type matches, just return it. + llvm::Type *PFTy = llvm::PointerType::getUnqual(Ty); + if (PFTy == F->getType()) return Entry = F; + + // If this isn't a definition, just return it casted to the right type. + if (!isDefinition) + return Entry = llvm::ConstantExpr::getBitCast(F, PFTy); + + // Otherwise, we have a definition after a prototype with the wrong type. + // F is the Function* for the one with the wrong type, we must make a new + // Function* and update everything that used F (a declaration) with the new + // Function* (which will be a definition). + // + // This happens if there is a prototype for a function (e.g. "int f()") and + // then a definition of a different type (e.g. "int f(int x)"). Start by + // making a new function of the correct type, RAUW, then steal the name. + llvm::Function *NewFn = new llvm::Function(FTy, + llvm::Function::ExternalLinkage, + "", &getModule()); + NewFn->takeName(F); + + // Replace uses of F with the Function we will endow with a body. + llvm::Constant *NewPtrForOldDecl = + llvm::ConstantExpr::getBitCast(NewFn, F->getType()); + F->replaceAllUsesWith(NewPtrForOldDecl); + + // FIXME: Update the globaldeclmap for the previous decl of this name. We + // really want a way to walk all of these, but we don't have it yet. This + // is incredibly slow! + ReplaceMapValuesWith(F, NewPtrForOldDecl); + + // Ok, delete the old function now, which is dead. + assert(F->isDeclaration() && "Shouldn't replace non-declaration"); + F->eraseFromParent(); + + // Return the new function which has the right type. + return Entry = NewFn; +} + +static bool IsZeroElementArray(const llvm::Type *Ty) { + if (const llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(Ty)) + return ATy->getNumElements() == 0; + return false; +} + +llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D, + bool isDefinition) { + assert(D->hasGlobalStorage() && "Not a global variable"); + + // See if it is already in the map. + llvm::Constant *&Entry = GlobalDeclMap[D]; + if (Entry) return Entry; + + QualType ASTTy = D->getType(); + const llvm::Type *Ty = getTypes().ConvertTypeForMem(ASTTy); + + // Check to see if the global already exists. + llvm::GlobalVariable *GV = getModule().getGlobalVariable(D->getName(), true); + + // If it doesn't already exist, just create and return an entry. + if (GV == 0) { + return Entry = new llvm::GlobalVariable(Ty, false, + llvm::GlobalValue::ExternalLinkage, + 0, D->getName(), &getModule(), 0, + ASTTy.getAddressSpace()); + } + + // If the pointer type matches, just return it. + llvm::Type *PTy = llvm::PointerType::getUnqual(Ty); + if (PTy == GV->getType()) return Entry = GV; + + // If this isn't a definition, just return it casted to the right type. + if (!isDefinition) + return Entry = llvm::ConstantExpr::getBitCast(GV, PTy); + + + // Otherwise, we have a definition after a prototype with the wrong type. + // GV is the GlobalVariable* for the one with the wrong type, we must make a + /// new GlobalVariable* and update everything that used GV (a declaration) + // with the new GlobalVariable* (which will be a definition). + // + // This happens if there is a prototype for a global (e.g. "extern int x[];") + // and then a definition of a different type (e.g. "int x[10];"). Start by + // making a new global of the correct type, RAUW, then steal the name. + llvm::GlobalVariable *NewGV = + new llvm::GlobalVariable(Ty, false, llvm::GlobalValue::ExternalLinkage, + 0, D->getName(), &getModule(), 0, + ASTTy.getAddressSpace()); + NewGV->takeName(GV); + + // Replace uses of GV with the globalvalue we will endow with a body. + llvm::Constant *NewPtrForOldDecl = + llvm::ConstantExpr::getBitCast(NewGV, GV->getType()); + GV->replaceAllUsesWith(NewPtrForOldDecl); + + // FIXME: Update the globaldeclmap for the previous decl of this name. We + // really want a way to walk all of these, but we don't have it yet. This + // is incredibly slow! + ReplaceMapValuesWith(GV, NewPtrForOldDecl); + + // Verify that GV was a declaration or something like x[] which turns into + // [0 x type]. + assert((GV->isDeclaration() || + IsZeroElementArray(GV->getType()->getElementType())) && + "Shouldn't replace non-declaration"); + + // Ok, delete the old global now, which is dead. + GV->eraseFromParent(); + + // Return the new global which has the right type. + return Entry = NewGV; +} + + +void CodeGenModule::EmitFunction(const FunctionDecl *FD) { + // If this is not a prototype, emit the body. + if (FD->getBody()) + CodeGenFunction(*this).GenerateCode(FD); +} + +llvm::Constant *CodeGenModule::EmitGlobalInit(const Expr *Expr) { + return EmitConstantExpr(Expr); +} + +void CodeGenModule::EmitGlobalVar(const FileVarDecl *D) { + // If this is just a forward declaration of the variable, don't emit it now, + // allow it to be emitted lazily on its first use. + if (D->getStorageClass() == VarDecl::Extern && D->getInit() == 0) + return; + + // Get the global, forcing it to be a direct reference. + llvm::GlobalVariable *GV = + cast<llvm::GlobalVariable>(GetAddrOfGlobalVar(D, true)); + + // Convert the initializer, or use zero if appropriate. + llvm::Constant *Init = 0; + if (D->getInit() == 0) { + Init = llvm::Constant::getNullValue(GV->getType()->getElementType()); + } else if (D->getType()->isIntegerType()) { + llvm::APSInt Value(static_cast<uint32_t>( + getContext().getTypeSize(D->getInit()->getType()))); + if (D->getInit()->isIntegerConstantExpr(Value, Context)) + Init = llvm::ConstantInt::get(Value); + } + + if (!Init) + Init = EmitGlobalInit(D->getInit()); + + assert(GV->getType()->getElementType() == Init->getType() && + "Initializer codegen type mismatch!"); + GV->setInitializer(Init); + + if (const VisibilityAttr *attr = D->getAttr<VisibilityAttr>()) + GV->setVisibility(attr->getVisibility()); + // FIXME: else handle -fvisibility + + // Set the llvm linkage type as appropriate. + if (D->getAttr<DLLImportAttr>()) + GV->setLinkage(llvm::Function::DLLImportLinkage); + else if (D->getAttr<DLLExportAttr>()) + GV->setLinkage(llvm::Function::DLLExportLinkage); + else if (D->getAttr<WeakAttr>()) { + GV->setLinkage(llvm::GlobalVariable::WeakLinkage); + + } else { + // FIXME: This isn't right. This should handle common linkage and other + // stuff. + switch (D->getStorageClass()) { + case VarDecl::Auto: + case VarDecl::Register: + assert(0 && "Can't have auto or register globals"); + case VarDecl::None: + if (!D->getInit()) + GV->setLinkage(llvm::GlobalVariable::WeakLinkage); + break; + case VarDecl::Extern: + case VarDecl::PrivateExtern: + // todo: common + break; + case VarDecl::Static: + GV->setLinkage(llvm::GlobalVariable::InternalLinkage); + break; + } + } +} + +/// EmitGlobalVarDeclarator - Emit all the global vars attached to the specified +/// declarator chain. +void CodeGenModule::EmitGlobalVarDeclarator(const FileVarDecl *D) { + for (; D; D = cast_or_null<FileVarDecl>(D->getNextDeclarator())) + EmitGlobalVar(D); +} + +void CodeGenModule::UpdateCompletedType(const TagDecl *TD) { + // Make sure that this type is translated. + Types.UpdateCompletedType(TD); +} + + +/// getBuiltinLibFunction +llvm::Function *CodeGenModule::getBuiltinLibFunction(unsigned BuiltinID) { + if (BuiltinID > BuiltinFunctions.size()) + BuiltinFunctions.resize(BuiltinID); + + // Cache looked up functions. Since builtin id #0 is invalid we don't reserve + // a slot for it. + assert(BuiltinID && "Invalid Builtin ID"); + llvm::Function *&FunctionSlot = BuiltinFunctions[BuiltinID-1]; + if (FunctionSlot) + return FunctionSlot; + + assert(Context.BuiltinInfo.isLibFunction(BuiltinID) && "isn't a lib fn"); + + // Get the name, skip over the __builtin_ prefix. + const char *Name = Context.BuiltinInfo.GetName(BuiltinID)+10; + + // Get the type for the builtin. + QualType Type = Context.BuiltinInfo.GetBuiltinType(BuiltinID, Context); + const llvm::FunctionType *Ty = + cast<llvm::FunctionType>(getTypes().ConvertType(Type)); + + // FIXME: This has a serious problem with code like this: + // void abs() {} + // ... __builtin_abs(x); + // The two versions of abs will collide. The fix is for the builtin to win, + // and for the existing one to be turned into a constantexpr cast of the + // builtin. In the case where the existing one is a static function, it + // should just be renamed. + if (llvm::Function *Existing = getModule().getFunction(Name)) { + if (Existing->getFunctionType() == Ty && Existing->hasExternalLinkage()) + return FunctionSlot = Existing; + assert(Existing == 0 && "FIXME: Name collision"); + } + + // FIXME: param attributes for sext/zext etc. + return FunctionSlot = new llvm::Function(Ty, llvm::Function::ExternalLinkage, + Name, &getModule()); +} + +llvm::Function *CodeGenModule::getIntrinsic(unsigned IID,const llvm::Type **Tys, + unsigned NumTys) { + return llvm::Intrinsic::getDeclaration(&getModule(), + (llvm::Intrinsic::ID)IID, Tys, NumTys); +} + +llvm::Function *CodeGenModule::getMemCpyFn() { + if (MemCpyFn) return MemCpyFn; + llvm::Intrinsic::ID IID; + switch (Context.Target.getPointerWidth(0)) { + default: assert(0 && "Unknown ptr width"); + case 32: IID = llvm::Intrinsic::memcpy_i32; break; + case 64: IID = llvm::Intrinsic::memcpy_i64; break; + } + return MemCpyFn = getIntrinsic(IID); +} + +llvm::Function *CodeGenModule::getMemSetFn() { + if (MemSetFn) return MemSetFn; + llvm::Intrinsic::ID IID; + switch (Context.Target.getPointerWidth(0)) { + default: assert(0 && "Unknown ptr width"); + case 32: IID = llvm::Intrinsic::memset_i32; break; + case 64: IID = llvm::Intrinsic::memset_i64; break; + } + return MemSetFn = getIntrinsic(IID); +} + +llvm::Constant *CodeGenModule:: +GetAddrOfConstantCFString(const std::string &str) { + llvm::StringMapEntry<llvm::Constant *> &Entry = + CFConstantStringMap.GetOrCreateValue(&str[0], &str[str.length()]); + + if (Entry.getValue()) + return Entry.getValue(); + + std::vector<llvm::Constant*> Fields; + + if (!CFConstantStringClassRef) { + const llvm::Type *Ty = getTypes().ConvertType(getContext().IntTy); + Ty = llvm::ArrayType::get(Ty, 0); + + CFConstantStringClassRef = + new llvm::GlobalVariable(Ty, false, + llvm::GlobalVariable::ExternalLinkage, 0, + "__CFConstantStringClassReference", + &getModule()); + } + + // Class pointer. + llvm::Constant *Zero = llvm::Constant::getNullValue(llvm::Type::Int32Ty); + llvm::Constant *Zeros[] = { Zero, Zero }; + llvm::Constant *C = + llvm::ConstantExpr::getGetElementPtr(CFConstantStringClassRef, Zeros, 2); + Fields.push_back(C); + + // Flags. + const llvm::Type *Ty = getTypes().ConvertType(getContext().IntTy); + Fields.push_back(llvm::ConstantInt::get(Ty, 1992)); + + // String pointer. + C = llvm::ConstantArray::get(str); + C = new llvm::GlobalVariable(C->getType(), true, + llvm::GlobalValue::InternalLinkage, + C, ".str", &getModule()); + + C = llvm::ConstantExpr::getGetElementPtr(C, Zeros, 2); + Fields.push_back(C); + + // String length. + Ty = getTypes().ConvertType(getContext().LongTy); + Fields.push_back(llvm::ConstantInt::get(Ty, str.length())); + + // The struct. + Ty = getTypes().ConvertType(getContext().getCFConstantStringType()); + C = llvm::ConstantStruct::get(cast<llvm::StructType>(Ty), Fields); + llvm::GlobalVariable *GV = + new llvm::GlobalVariable(C->getType(), true, + llvm::GlobalVariable::InternalLinkage, + C, "", &getModule()); + GV->setSection("__DATA,__cfstring"); + Entry.setValue(GV); + return GV; +} + +/// GenerateWritableString -- Creates storage for a string literal. +static llvm::Constant *GenerateStringLiteral(const std::string &str, + bool constant, + CodeGenModule &CGM) { + // Create Constant for this string literal + llvm::Constant *C=llvm::ConstantArray::get(str); + + // Create a global variable for this string + C = new llvm::GlobalVariable(C->getType(), constant, + llvm::GlobalValue::InternalLinkage, + C, ".str", &CGM.getModule()); + return C; +} + +/// CodeGenModule::GetAddrOfConstantString -- returns a pointer to the character +/// array containing the literal. The result is pointer to array type. +llvm::Constant *CodeGenModule::GetAddrOfConstantString(const std::string &str) { + // Don't share any string literals if writable-strings is turned on. + if (Features.WritableStrings) + return GenerateStringLiteral(str, false, *this); + + llvm::StringMapEntry<llvm::Constant *> &Entry = + ConstantStringMap.GetOrCreateValue(&str[0], &str[str.length()]); + + if (Entry.getValue()) + return Entry.getValue(); + + // Create a global variable for this. + llvm::Constant *C = GenerateStringLiteral(str, true, *this); + Entry.setValue(C); + return C; +} diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h new file mode 100644 index 00000000000..cbea09fd3ec --- /dev/null +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -0,0 +1,129 @@ +//===--- CodeGenModule.h - Per-Module state for LLVM CodeGen --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the internal per-translation-unit state used for llvm translation. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_CODEGEN_CODEGENMODULE_H +#define CLANG_CODEGEN_CODEGENMODULE_H + +#include "CodeGenTypes.h" +#include "CGObjCRuntime.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" + +namespace llvm { + class Module; + class Constant; + class Function; + class GlobalVariable; + class TargetData; +} + +namespace clang { + class ASTContext; + class FunctionDecl; + class Decl; + class Expr; + class Stmt; + class ValueDecl; + class VarDecl; + class TypeDecl; + class FileVarDecl; + struct LangOptions; + class Diagnostic; + +namespace CodeGen { + + class CodeGenFunction; + +/// CodeGenModule - This class organizes the cross-module state that is used +/// while generating LLVM code. +class CodeGenModule { + ASTContext &Context; + const LangOptions &Features; + llvm::Module &TheModule; + const llvm::TargetData &TheTargetData; + Diagnostic &Diags; + CodeGenTypes Types; + CGObjCRuntime *Runtime; + + llvm::Function *MemCpyFn; + llvm::Function *MemSetFn; + llvm::DenseMap<const Decl*, llvm::Constant*> GlobalDeclMap; + std::vector<llvm::Constant*> GlobalCtors; + + llvm::StringMap<llvm::Constant*> CFConstantStringMap; + llvm::StringMap<llvm::Constant*> ConstantStringMap; + llvm::Constant *CFConstantStringClassRef; + + std::vector<llvm::Function *> BuiltinFunctions; +public: + CodeGenModule(ASTContext &C, const LangOptions &Features, llvm::Module &M, + const llvm::TargetData &TD, Diagnostic &Diags); + ~CodeGenModule(); + + CGObjCRuntime *getObjCRuntime() { return Runtime; } + ASTContext &getContext() const { return Context; } + const LangOptions &getLangOptions() const { return Features; } + llvm::Module &getModule() const { return TheModule; } + CodeGenTypes &getTypes() { return Types; } + Diagnostic &getDiags() const { return Diags; } + const llvm::TargetData &getTargetData() const { return TheTargetData; } + + llvm::Constant *GetAddrOfFunctionDecl(const FunctionDecl *D, + bool isDefinition); + llvm::Constant *GetAddrOfGlobalVar(const VarDecl *D, bool isDefinition); + + + /// getBuiltinLibFunction - Given a builtin id for a function like + /// "__builtin_fabsf", return a Function* for "fabsf". + /// + llvm::Function *getBuiltinLibFunction(unsigned BuiltinID); + llvm::Constant *GetAddrOfConstantCFString(const std::string& str); + + /// GetAddrOfConstantString -- returns a pointer to the character + /// array containing the literal. The result is pointer to array type. + llvm::Constant *GetAddrOfConstantString(const std::string& str); + llvm::Function *getMemCpyFn(); + llvm::Function *getMemSetFn(); + llvm::Function *getIntrinsic(unsigned IID, const llvm::Type **Tys = 0, + unsigned NumTys = 0); + + void AddGlobalCtor(llvm::Function * Ctor); + void EmitGlobalCtors(void); + + void EmitFunction(const FunctionDecl *FD); + void EmitGlobalVar(const FileVarDecl *D); + void EmitGlobalVarDeclarator(const FileVarDecl *D); + void UpdateCompletedType(const TagDecl *D); + llvm::Constant *EmitGlobalInit(const Expr *E); + llvm::Constant *EmitConstantExpr(const Expr *E, CodeGenFunction *CGF = 0); + + /// WarnUnsupported - Print out a warning that codegen doesn't support the + /// specified stmt yet. + + void WarnUnsupported(const Stmt *S, const char *Type); + + /// WarnUnsupported - Print out a warning that codegen doesn't support the + /// specified decl yet. + void WarnUnsupported(const Decl *D, const char *Type); + +private: + /// ReplaceMapValuesWith - This is a really slow and bad function that + /// searches for any entries in GlobalDeclMap that point to OldVal, changing + /// them to point to NewVal. This is badbadbad, FIXME! + void ReplaceMapValuesWith(llvm::Constant *OldVal, llvm::Constant *NewVal); + +}; +} // end namespace CodeGen +} // end namespace clang + +#endif diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp new file mode 100644 index 00000000000..9a669e87056 --- /dev/null +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -0,0 +1,580 @@ +//===--- CodeGenTypes.cpp - Type translation for LLVM CodeGen -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the code that handles AST -> LLVM type lowering. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenTypes.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/AST/AST.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetData.h" + +using namespace clang; +using namespace CodeGen; + +namespace { + /// RecordOrganizer - This helper class, used by CGRecordLayout, layouts + /// structs and unions. It manages transient information used during layout. + /// FIXME : Handle field aligments. Handle packed structs. + class RecordOrganizer { + public: + explicit RecordOrganizer(CodeGenTypes &Types) : + CGT(Types), STy(NULL), llvmFieldNo(0), Cursor(0), + llvmSize(0) {} + + /// addField - Add new field. + void addField(const FieldDecl *FD); + + /// addLLVMField - Add llvm struct field that corresponds to llvm type Ty. + /// Increment field count. + void addLLVMField(const llvm::Type *Ty, bool isPaddingField = false); + + /// addPaddingFields - Current cursor is not suitable place to add next + /// field. Add required padding fields. + void addPaddingFields(unsigned WaterMark); + + /// layoutStructFields - Do the actual work and lay out all fields. Create + /// corresponding llvm struct type. This should be invoked only after + /// all fields are added. + void layoutStructFields(const ASTRecordLayout &RL); + + /// layoutUnionFields - Do the actual work and lay out all fields. Create + /// corresponding llvm struct type. This should be invoked only after + /// all fields are added. + void layoutUnionFields(); + + /// getLLVMType - Return associated llvm struct type. This may be NULL + /// if fields are not laid out. + llvm::Type *getLLVMType() const { + return STy; + } + + /// placeBitField - Find a place for FD, which is a bit-field. + void placeBitField(const FieldDecl *FD); + + llvm::SmallSet<unsigned, 8> &getPaddingFields() { + return PaddingFields; + } + + private: + CodeGenTypes &CGT; + llvm::Type *STy; + unsigned llvmFieldNo; + uint64_t Cursor; + uint64_t llvmSize; + llvm::SmallVector<const FieldDecl *, 8> FieldDecls; + std::vector<const llvm::Type*> LLVMFields; + llvm::SmallSet<unsigned, 8> PaddingFields; + }; +} + +CodeGenTypes::CodeGenTypes(ASTContext &Ctx, llvm::Module& M, + const llvm::TargetData &TD) + : Context(Ctx), Target(Ctx.Target), TheModule(M), TheTargetData(TD) { +} + +CodeGenTypes::~CodeGenTypes() { + for(llvm::DenseMap<const TagDecl *, CGRecordLayout *>::iterator + I = CGRecordLayouts.begin(), E = CGRecordLayouts.end(); + I != E; ++I) + delete I->second; + CGRecordLayouts.clear(); +} + +/// ConvertType - Convert the specified type to its LLVM form. +const llvm::Type *CodeGenTypes::ConvertType(QualType T) { + // See if type is already cached. + llvm::DenseMap<Type *, llvm::PATypeHolder>::iterator + I = TypeCache.find(T.getCanonicalType().getTypePtr()); + // If type is found in map and this is not a definition for a opaque + // place holder type then use it. Otherwise, convert type T. + if (I != TypeCache.end()) + return I->second.get(); + + const llvm::Type *ResultType = ConvertNewType(T); + TypeCache.insert(std::make_pair(T.getCanonicalType().getTypePtr(), + llvm::PATypeHolder(ResultType))); + return ResultType; +} + +/// ConvertTypeForMem - Convert type T into a llvm::Type. This differs from +/// ConvertType in that it is used to convert to the memory representation for +/// a type. For example, the scalar representation for _Bool is i1, but the +/// memory representation is usually i8 or i32, depending on the target. +const llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) { + const llvm::Type *R = ConvertType(T); + + // If this is a non-bool type, don't map it. + if (R != llvm::Type::Int1Ty) + return R; + + // Otherwise, return an integer of the target-specified size. + return llvm::IntegerType::get((unsigned)Context.getTypeSize(T)); + +} + +/// UpdateCompletedType - When we find the full definition for a TagDecl, +/// replace the 'opaque' type we previously made for it if applicable. +void CodeGenTypes::UpdateCompletedType(const TagDecl *TD) { + llvm::DenseMap<const TagDecl*, llvm::PATypeHolder>::iterator TDTI = + TagDeclTypes.find(TD); + if (TDTI == TagDeclTypes.end()) return; + + // Remember the opaque LLVM type for this tagdecl. + llvm::PATypeHolder OpaqueHolder = TDTI->second; + assert(isa<llvm::OpaqueType>(OpaqueHolder.get()) && + "Updating compilation of an already non-opaque type?"); + + // Remove it from TagDeclTypes so that it will be regenerated. + TagDeclTypes.erase(TDTI); + + // Generate the new type. + const llvm::Type *NT = ConvertTagDeclType(TD); + + // Refine the old opaque type to its new definition. + cast<llvm::OpaqueType>(OpaqueHolder.get())->refineAbstractTypeTo(NT); +} + + + +const llvm::Type *CodeGenTypes::ConvertNewType(QualType T) { + const clang::Type &Ty = *T.getCanonicalType(); + + switch (Ty.getTypeClass()) { + case Type::TypeName: // typedef isn't canonical. + case Type::TypeOfExp: // typeof isn't canonical. + case Type::TypeOfTyp: // typeof isn't canonical. + assert(0 && "Non-canonical type, shouldn't happen"); + case Type::Builtin: { + switch (cast<BuiltinType>(Ty).getKind()) { + case BuiltinType::Void: + // LLVM void type can only be used as the result of a function call. Just + // map to the same as char. + return llvm::IntegerType::get(8); + + case BuiltinType::Bool: + // Note that we always return bool as i1 for use as a scalar type. + return llvm::Type::Int1Ty; + + case BuiltinType::Char_S: + case BuiltinType::Char_U: + case BuiltinType::SChar: + case BuiltinType::UChar: + case BuiltinType::Short: + case BuiltinType::UShort: + case BuiltinType::Int: + case BuiltinType::UInt: + case BuiltinType::Long: + case BuiltinType::ULong: + case BuiltinType::LongLong: + case BuiltinType::ULongLong: + return llvm::IntegerType::get( + static_cast<unsigned>(Context.getTypeSize(T))); + + case BuiltinType::Float: return llvm::Type::FloatTy; + case BuiltinType::Double: return llvm::Type::DoubleTy; + case BuiltinType::LongDouble: + // FIXME: mapping long double onto double. + return llvm::Type::DoubleTy; + } + break; + } + case Type::Complex: { + std::vector<const llvm::Type*> Elts; + Elts.push_back(ConvertType(cast<ComplexType>(Ty).getElementType())); + Elts.push_back(Elts[0]); + return llvm::StructType::get(Elts); + } + case Type::Pointer: { + const PointerType &P = cast<PointerType>(Ty); + QualType ETy = P.getPointeeType(); + return llvm::PointerType::get(ConvertType(ETy), ETy.getAddressSpace()); + } + case Type::Reference: { + const ReferenceType &R = cast<ReferenceType>(Ty); + return llvm::PointerType::getUnqual(ConvertType(R.getReferenceeType())); + } + + case Type::VariableArray: { + const VariableArrayType &A = cast<VariableArrayType>(Ty); + assert(A.getIndexTypeQualifier() == 0 && + "FIXME: We only handle trivial array types so far!"); + // VLAs resolve to the innermost element type; this matches + // the return of alloca, and there isn't any obviously better choice. + return ConvertType(A.getElementType()); + } + case Type::IncompleteArray: { + const IncompleteArrayType &A = cast<IncompleteArrayType>(Ty); + assert(A.getIndexTypeQualifier() == 0 && + "FIXME: We only handle trivial array types so far!"); + // int X[] -> [0 x int] + return llvm::ArrayType::get(ConvertType(A.getElementType()), 0); + } + case Type::ConstantArray: { + const ConstantArrayType &A = cast<ConstantArrayType>(Ty); + const llvm::Type *EltTy = ConvertType(A.getElementType()); + return llvm::ArrayType::get(EltTy, A.getSize().getZExtValue()); + } + case Type::OCUVector: + case Type::Vector: { + const VectorType &VT = cast<VectorType>(Ty); + return llvm::VectorType::get(ConvertType(VT.getElementType()), + VT.getNumElements()); + } + case Type::FunctionNoProto: + case Type::FunctionProto: { + const FunctionType &FP = cast<FunctionType>(Ty); + const llvm::Type *ResultType; + + if (FP.getResultType()->isVoidType()) + ResultType = llvm::Type::VoidTy; // Result of function uses llvm void. + else + ResultType = ConvertType(FP.getResultType()); + + // FIXME: Convert argument types. + bool isVarArg; + std::vector<const llvm::Type*> ArgTys; + + // Struct return passes the struct byref. + if (!ResultType->isFirstClassType() && ResultType != llvm::Type::VoidTy) { + ArgTys.push_back(llvm::PointerType::get(ResultType, + FP.getResultType().getAddressSpace())); + ResultType = llvm::Type::VoidTy; + } + + if (const FunctionTypeProto *FTP = dyn_cast<FunctionTypeProto>(&FP)) { + DecodeArgumentTypes(*FTP, ArgTys); + isVarArg = FTP->isVariadic(); + } else { + isVarArg = true; + } + + return llvm::FunctionType::get(ResultType, ArgTys, isVarArg); + } + + case Type::ASQual: + return ConvertType(QualType(cast<ASQualType>(Ty).getBaseType(), 0)); + + case Type::ObjCInterface: + assert(0 && "FIXME: add missing functionality here"); + break; + + case Type::ObjCQualifiedInterface: + assert(0 && "FIXME: add missing functionality here"); + break; + + case Type::ObjCQualifiedId: + assert(0 && "FIXME: add missing functionality here"); + break; + + case Type::Tagged: { + const TagDecl *TD = cast<TagType>(Ty).getDecl(); + const llvm::Type *Res = ConvertTagDeclType(TD); + + std::string TypeName(TD->getKindName()); + TypeName += '.'; + + // Name the codegen type after the typedef name + // if there is no tag type name available + if (TD->getIdentifier()) + TypeName += TD->getName(); + else if (const TypedefType *TdT = dyn_cast<TypedefType>(T)) + TypeName += TdT->getDecl()->getName(); + else + TypeName += "anon"; + + TheModule.addTypeName(TypeName, Res); + return Res; + } + } + + // FIXME: implement. + return llvm::OpaqueType::get(); +} + +void CodeGenTypes::DecodeArgumentTypes(const FunctionTypeProto &FTP, + std::vector<const llvm::Type*> &ArgTys) { + for (unsigned i = 0, e = FTP.getNumArgs(); i != e; ++i) { + const llvm::Type *Ty = ConvertType(FTP.getArgType(i)); + if (Ty->isFirstClassType()) + ArgTys.push_back(Ty); + else + // byval arguments are always on the stack, which is addr space #0. + ArgTys.push_back(llvm::PointerType::getUnqual(Ty)); + } +} + +/// ConvertTagDeclType - Lay out a tagged decl type like struct or union or +/// enum. +const llvm::Type *CodeGenTypes::ConvertTagDeclType(const TagDecl *TD) { + llvm::DenseMap<const TagDecl*, llvm::PATypeHolder>::iterator TDTI = + TagDeclTypes.find(TD); + + // If we've already compiled this tag type, use the previous definition. + if (TDTI != TagDeclTypes.end()) + return TDTI->second; + + // If this is still a forward definition, just define an opaque type to use + // for this tagged decl. + if (!TD->isDefinition()) { + llvm::Type *ResultType = llvm::OpaqueType::get(); + TagDeclTypes.insert(std::make_pair(TD, ResultType)); + return ResultType; + } + + // Okay, this is a definition of a type. Compile the implementation now. + + if (TD->getKind() == Decl::Enum) { + // Don't bother storing enums in TagDeclTypes. + return ConvertType(cast<EnumDecl>(TD)->getIntegerType()); + } + + // This decl could well be recursive. In this case, insert an opaque + // definition of this type, which the recursive uses will get. We will then + // refine this opaque version later. + + // Create new OpaqueType now for later use in case this is a recursive + // type. This will later be refined to the actual type. + llvm::PATypeHolder ResultHolder = llvm::OpaqueType::get(); + TagDeclTypes.insert(std::make_pair(TD, ResultHolder)); + + const llvm::Type *ResultType; + const RecordDecl *RD = cast<const RecordDecl>(TD); + if (TD->getKind() == Decl::Struct || TD->getKind() == Decl::Class) { + // Layout fields. + RecordOrganizer RO(*this); + for (unsigned i = 0, e = RD->getNumMembers(); i != e; ++i) + RO.addField(RD->getMember(i)); + + RO.layoutStructFields(Context.getASTRecordLayout(RD)); + + // Get llvm::StructType. + CGRecordLayouts[TD] = new CGRecordLayout(RO.getLLVMType(), + RO.getPaddingFields()); + ResultType = RO.getLLVMType(); + + } else if (TD->getKind() == Decl::Union) { + // Just use the largest element of the union, breaking ties with the + // highest aligned member. + if (RD->getNumMembers() != 0) { + RecordOrganizer RO(*this); + for (unsigned i = 0, e = RD->getNumMembers(); i != e; ++i) + RO.addField(RD->getMember(i)); + + RO.layoutUnionFields(); + + // Get llvm::StructType. + CGRecordLayouts[TD] = new CGRecordLayout(RO.getLLVMType(), + RO.getPaddingFields()); + ResultType = RO.getLLVMType(); + } else { + ResultType = llvm::StructType::get(std::vector<const llvm::Type*>()); + } + } else { + assert(0 && "FIXME: Unknown tag decl kind!"); + } + + // Refine our Opaque type to ResultType. This can invalidate ResultType, so + // make sure to read the result out of the holder. + cast<llvm::OpaqueType>(ResultHolder.get()) + ->refineAbstractTypeTo(ResultType); + + return ResultHolder.get(); +} + +/// getLLVMFieldNo - Return llvm::StructType element number +/// that corresponds to the field FD. +unsigned CodeGenTypes::getLLVMFieldNo(const FieldDecl *FD) { + llvm::DenseMap<const FieldDecl *, unsigned>::iterator + I = FieldInfo.find(FD); + assert (I != FieldInfo.end() && "Unable to find field info"); + return I->second; +} + +/// addFieldInfo - Assign field number to field FD. +void CodeGenTypes::addFieldInfo(const FieldDecl *FD, unsigned No) { + FieldInfo[FD] = No; +} + +/// getBitFieldInfo - Return the BitFieldInfo that corresponds to the field FD. +CodeGenTypes::BitFieldInfo CodeGenTypes::getBitFieldInfo(const FieldDecl *FD) { + llvm::DenseMap<const FieldDecl *, BitFieldInfo>::iterator + I = BitFields.find(FD); + assert (I != BitFields.end() && "Unable to find bitfield info"); + return I->second; +} + +/// addBitFieldInfo - Assign a start bit and a size to field FD. +void CodeGenTypes::addBitFieldInfo(const FieldDecl *FD, unsigned Begin, + unsigned Size) { + BitFields.insert(std::make_pair(FD, BitFieldInfo(Begin, Size))); +} + +/// getCGRecordLayout - Return record layout info for the given llvm::Type. +const CGRecordLayout * +CodeGenTypes::getCGRecordLayout(const TagDecl *TD) const { + llvm::DenseMap<const TagDecl*, CGRecordLayout *>::iterator I + = CGRecordLayouts.find(TD); + assert (I != CGRecordLayouts.end() + && "Unable to find record layout information for type"); + return I->second; +} + +/// addField - Add new field. +void RecordOrganizer::addField(const FieldDecl *FD) { + assert (!STy && "Record fields are already laid out"); + FieldDecls.push_back(FD); +} + +/// layoutStructFields - Do the actual work and lay out all fields. Create +/// corresponding llvm struct type. This should be invoked only after +/// all fields are added. +/// FIXME : At the moment assume +/// - one to one mapping between AST FieldDecls and +/// llvm::StructType elements. +/// - Ignore bit fields +/// - Ignore field aligments +/// - Ignore packed structs +void RecordOrganizer::layoutStructFields(const ASTRecordLayout &RL) { + // FIXME : Use SmallVector + llvmSize = 0; + llvmFieldNo = 0; + Cursor = 0; + LLVMFields.clear(); + + for (llvm::SmallVector<const FieldDecl *, 8>::iterator I = FieldDecls.begin(), + E = FieldDecls.end(); I != E; ++I) { + const FieldDecl *FD = *I; + + if (FD->isBitField()) + placeBitField(FD); + else { + const llvm::Type *Ty = CGT.ConvertType(FD->getType()); + addLLVMField(Ty); + CGT.addFieldInfo(FD, llvmFieldNo - 1); + Cursor = llvmSize; + } + } + + unsigned StructAlign = RL.getAlignment(); + if (llvmSize % StructAlign) { + unsigned StructPadding = StructAlign - (llvmSize % StructAlign); + addPaddingFields(llvmSize + StructPadding); + } + + STy = llvm::StructType::get(LLVMFields); +} + +/// addPaddingFields - Current cursor is not suitable place to add next field. +/// Add required padding fields. +void RecordOrganizer::addPaddingFields(unsigned WaterMark) { + assert(WaterMark >= llvmSize && "Invalid padding Field"); + unsigned RequiredBits = WaterMark - llvmSize; + unsigned RequiredBytes = (RequiredBits + 7) / 8; + for (unsigned i = 0; i != RequiredBytes; ++i) + addLLVMField(llvm::Type::Int8Ty, true); +} + +/// addLLVMField - Add llvm struct field that corresponds to llvm type Ty. +/// Increment field count. +void RecordOrganizer::addLLVMField(const llvm::Type *Ty, bool isPaddingField) { + + unsigned AlignmentInBits = CGT.getTargetData().getABITypeAlignment(Ty) * 8; + if (llvmSize % AlignmentInBits) { + // At the moment, insert padding fields even if target specific llvm + // type alignment enforces implict padding fields for FD. Later on, + // optimize llvm fields by removing implicit padding fields and + // combining consequetive padding fields. + unsigned Padding = AlignmentInBits - (llvmSize % AlignmentInBits); + addPaddingFields(llvmSize + Padding); + } + + unsigned TySize = CGT.getTargetData().getABITypeSizeInBits(Ty); + llvmSize += TySize; + if (isPaddingField) + PaddingFields.insert(llvmFieldNo); + LLVMFields.push_back(Ty); + ++llvmFieldNo; +} + +/// layoutUnionFields - Do the actual work and lay out all fields. Create +/// corresponding llvm struct type. This should be invoked only after +/// all fields are added. +void RecordOrganizer::layoutUnionFields() { + + unsigned PrimaryEltNo = 0; + std::pair<uint64_t, unsigned> PrimaryElt = + CGT.getContext().getTypeInfo(FieldDecls[0]->getType()); + CGT.addFieldInfo(FieldDecls[0], 0); + + unsigned Size = FieldDecls.size(); + for(unsigned i = 1; i != Size; ++i) { + const FieldDecl *FD = FieldDecls[i]; + assert (!FD->isBitField() && "Bit fields are not yet supported"); + std::pair<uint64_t, unsigned> EltInfo = + CGT.getContext().getTypeInfo(FD->getType()); + + // Use largest element, breaking ties with the hightest aligned member. + if (EltInfo.first > PrimaryElt.first || + (EltInfo.first == PrimaryElt.first && + EltInfo.second > PrimaryElt.second)) { + PrimaryElt = EltInfo; + PrimaryEltNo = i; + } + + // In union, each field gets first slot. + CGT.addFieldInfo(FD, 0); + } + + std::vector<const llvm::Type*> Fields; + const llvm::Type *Ty = CGT.ConvertType(FieldDecls[PrimaryEltNo]->getType()); + Fields.push_back(Ty); + STy = llvm::StructType::get(Fields); +} + +/// placeBitField - Find a place for FD, which is a bit-field. +/// This function searches for the last aligned field. If the bit-field fits in +/// it, it is reused. Otherwise, the bit-field is placed in a new field. +void RecordOrganizer::placeBitField(const FieldDecl *FD) { + + assert (FD->isBitField() && "FD is not a bit-field"); + Expr *BitWidth = FD->getBitWidth(); + llvm::APSInt FieldSize(32); + bool isBitField = + BitWidth->isIntegerConstantExpr(FieldSize, CGT.getContext()); + assert (isBitField && "Invalid BitField size expression"); + uint64_t BitFieldSize = FieldSize.getZExtValue(); + + const llvm::Type *Ty = CGT.ConvertType(FD->getType()); + uint64_t TySize = CGT.getTargetData().getABITypeSizeInBits(Ty); + + unsigned Idx = Cursor / TySize; + unsigned BitsLeft = TySize - (Cursor % TySize); + + if (BitsLeft >= BitFieldSize) { + // The bitfield fits in the last aligned field. + // This is : struct { char a; int CurrentField:10;}; + // where 'CurrentField' shares first field with 'a'. + CGT.addFieldInfo(FD, Idx); + CGT.addBitFieldInfo(FD, TySize - BitsLeft, BitFieldSize); + Cursor += BitFieldSize; + } else { + // Place the bitfield in a new LLVM field. + // This is : struct { char a; short CurrentField:10;}; + // where 'CurrentField' needs a new llvm field. + CGT.addFieldInfo(FD, Idx + 1); + CGT.addBitFieldInfo(FD, 0, BitFieldSize); + Cursor = (Idx + 1) * TySize + BitFieldSize; + } + if (Cursor > llvmSize) + addPaddingFields(Cursor); +} diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h new file mode 100644 index 00000000000..08a2467106a --- /dev/null +++ b/clang/lib/CodeGen/CodeGenTypes.h @@ -0,0 +1,165 @@ +//===--- CodeGenTypes.h - Type translation for LLVM CodeGen -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the code that handles AST -> LLVM type lowering. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_CODEGEN_CODEGENTYPES_H +#define CLANG_CODEGEN_CODEGENTYPES_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include <vector> + +namespace llvm { + class Module; + class Type; + class PATypeHolder; + class TargetData; +} + +namespace clang { + class ASTContext; + class TagDecl; + class TargetInfo; + class QualType; + class Type; + class FunctionTypeProto; + class FieldDecl; + class RecordDecl; + +namespace CodeGen { + class CodeGenTypes; + + /// CGRecordLayout - This class handles struct and union layout info while + /// lowering AST types to LLVM types. + class CGRecordLayout { + CGRecordLayout(); // DO NOT IMPLEMENT + public: + CGRecordLayout(llvm::Type *T, llvm::SmallSet<unsigned, 8> &PF) + : STy(T), PaddingFields(PF) { + // FIXME : Collect info about fields that requires adjustments + // (i.e. fields that do not directly map to llvm struct fields.) + } + + /// getLLVMType - Return llvm type associated with this record. + llvm::Type *getLLVMType() const { + return STy; + } + + bool isPaddingField(unsigned No) const { + return PaddingFields.count(No) != 0; + } + + unsigned getNumPaddingFields() { + return PaddingFields.size(); + } + + private: + llvm::Type *STy; + llvm::SmallSet<unsigned, 8> PaddingFields; + }; + +/// CodeGenTypes - This class organizes the cross-module state that is used +/// while lowering AST types to LLVM types. +class CodeGenTypes { + ASTContext &Context; + TargetInfo &Target; + llvm::Module& TheModule; + const llvm::TargetData& TheTargetData; + + llvm::DenseMap<const TagDecl*, llvm::PATypeHolder> TagDeclTypes; + + /// CGRecordLayouts - This maps llvm struct type with corresponding + /// record layout info. + /// FIXME : If CGRecordLayout is less than 16 bytes then use + /// inline it in the map. + llvm::DenseMap<const TagDecl*, CGRecordLayout *> CGRecordLayouts; + + /// FieldInfo - This maps struct field with corresponding llvm struct type + /// field no. This info is populated by record organizer. + llvm::DenseMap<const FieldDecl *, unsigned> FieldInfo; + +public: + class BitFieldInfo { + public: + explicit BitFieldInfo(unsigned short B, unsigned short S) + : Begin(B), Size(S) {} + + unsigned short Begin; + unsigned short Size; + }; + +private: + llvm::DenseMap<const FieldDecl *, BitFieldInfo> BitFields; + + /// TypeCache - This map keeps cache of llvm::Types (through PATypeHolder) + /// and maps llvm::Types to corresponding clang::Type. llvm::PATypeHolder is + /// used instead of llvm::Type because it allows us to bypass potential + /// dangling type pointers due to type refinement on llvm side. + llvm::DenseMap<Type *, llvm::PATypeHolder> TypeCache; + + /// ConvertNewType - Convert type T into a llvm::Type. Do not use this + /// method directly because it does not do any type caching. This method + /// is available only for ConvertType(). CovertType() is preferred + /// interface to convert type T into a llvm::Type. + const llvm::Type *ConvertNewType(QualType T); +public: + CodeGenTypes(ASTContext &Ctx, llvm::Module &M, const llvm::TargetData &TD); + ~CodeGenTypes(); + + const llvm::TargetData &getTargetData() const { return TheTargetData; } + TargetInfo &getTarget() const { return Target; } + ASTContext &getContext() const { return Context; } + + /// ConvertType - Convert type T into a llvm::Type. + const llvm::Type *ConvertType(QualType T); + + /// ConvertTypeForMem - Convert type T into a llvm::Type. This differs from + /// ConvertType in that it is used to convert to the memory representation for + /// a type. For example, the scalar representation for _Bool is i1, but the + /// memory representation is usually i8 or i32, depending on the target. + const llvm::Type *ConvertTypeForMem(QualType T); + + + const CGRecordLayout *getCGRecordLayout(const TagDecl*) const; + + /// getLLVMFieldNo - Return llvm::StructType element number + /// that corresponds to the field FD. + unsigned getLLVMFieldNo(const FieldDecl *FD); + + + /// UpdateCompletedType - When we find the full definition for a TagDecl, + /// replace the 'opaque' type we previously made for it if applicable. + void UpdateCompletedType(const TagDecl *TD); + +public: // These are internal details of CGT that shouldn't be used externally. + void DecodeArgumentTypes(const FunctionTypeProto &FTP, + std::vector<const llvm::Type*> &ArgTys); + + /// addFieldInfo - Assign field number to field FD. + void addFieldInfo(const FieldDecl *FD, unsigned No); + + /// addBitFieldInfo - Assign a start bit and a size to field FD. + void addBitFieldInfo(const FieldDecl *FD, unsigned Begin, unsigned Size); + + /// getBitFieldInfo - Return the BitFieldInfo that corresponds to the field + /// FD. + BitFieldInfo getBitFieldInfo(const FieldDecl *FD); + + /// ConvertTagDeclType - Lay out a tagged decl type like struct or union or + /// enum. + const llvm::Type *ConvertTagDeclType(const TagDecl *TD); +}; + +} // end namespace CodeGen +} // end namespace clang + +#endif diff --git a/clang/lib/CodeGen/Makefile b/clang/lib/CodeGen/Makefile new file mode 100644 index 00000000000..4d7828ef67a --- /dev/null +++ b/clang/lib/CodeGen/Makefile @@ -0,0 +1,23 @@ +##===- clang/lib/CodeGen/Makefile --------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements the AST -> LLVM code generation library for the +# C-Language front-end. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME := clangCodeGen +BUILD_ARCHIVE = 1 +CXXFLAGS = -fno-rtti + +CPPFLAGS += -I$(PROJ_SRC_DIR)/../../include + +include $(LEVEL)/Makefile.common + diff --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp new file mode 100644 index 00000000000..06467488a5e --- /dev/null +++ b/clang/lib/CodeGen/ModuleBuilder.cpp @@ -0,0 +1,104 @@ +//===--- ModuleBuilder.cpp - Emit LLVM Code from ASTs ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This builds an AST and converts it to LLVM Code. +// +//===----------------------------------------------------------------------===// + +#include "clang/CodeGen/ModuleBuilder.h" +#include "CodeGenModule.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +using namespace clang; + +//===----------------------------------------------------------------------===// +// LLVM Emitter + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/CodeGen/ModuleBuilder.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" + +namespace { + class CodeGenerator : public ASTConsumer { + Diagnostic &Diags; + const llvm::TargetData *TD; + ASTContext *Ctx; + const LangOptions &Features; + protected: + llvm::Module *&M; + CodeGen::CodeGenModule *Builder; + public: + CodeGenerator(Diagnostic &diags, const LangOptions &LO, + llvm::Module *&DestModule) + : Diags(diags), Features(LO), M(DestModule) {} + + ~CodeGenerator() { + delete Builder; + } + + virtual void Initialize(ASTContext &Context) { + Ctx = &Context; + + M->setTargetTriple(Ctx->Target.getTargetTriple()); + M->setDataLayout(Ctx->Target.getTargetDescription()); + TD = new llvm::TargetData(Ctx->Target.getTargetDescription()); + Builder = new CodeGen::CodeGenModule(Context, Features, *M, *TD, Diags); + } + + virtual void HandleTopLevelDecl(Decl *D) { + // If an error occurred, stop code generation, but continue parsing and + // semantic analysis (to ensure all warnings and errors are emitted). + if (Diags.hasErrorOccurred()) + return; + + if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) { + Builder->EmitFunction(FD); + } else if (FileVarDecl *FVD = dyn_cast<FileVarDecl>(D)) { + Builder->EmitGlobalVarDeclarator(FVD); + } else if (LinkageSpecDecl *LSD = dyn_cast<LinkageSpecDecl>(D)) { + if (LSD->getLanguage() == LinkageSpecDecl::lang_cxx) + Builder->WarnUnsupported(LSD, "linkage spec"); + // FIXME: implement C++ linkage, C linkage works mostly by C + // language reuse already. + } else if (FileScopeAsmDecl *AD = dyn_cast<FileScopeAsmDecl>(D)) { + std::string AsmString(AD->getAsmString()->getStrData(), + AD->getAsmString()->getByteLength()); + + const std::string &S = Builder->getModule().getModuleInlineAsm(); + if (S.empty()) + Builder->getModule().setModuleInlineAsm(AsmString); + else + Builder->getModule().setModuleInlineAsm(S + '\n' + AsmString); + } else { + assert(isa<TypeDecl>(D) && "Unknown top level decl"); + // TODO: handle debug info? + } + } + + /// HandleTagDeclDefinition - This callback is invoked each time a TagDecl + /// (e.g. struct, union, enum, class) is completed. This allows the client to + /// hack on the type, which can occur at any point in the file (because these + /// can be defined in declspecs). + virtual void HandleTagDeclDefinition(TagDecl *D) { + Builder->UpdateCompletedType(D); + } + + }; +} + +ASTConsumer *clang::CreateLLVMCodeGen(Diagnostic &Diags, + const LangOptions &Features, + llvm::Module *&DestModule) { + return new CodeGenerator(Diags, Features, DestModule); +} + diff --git a/clang/lib/Headers/Makefile b/clang/lib/Headers/Makefile new file mode 100644 index 00000000000..7af7f0f8700 --- /dev/null +++ b/clang/lib/Headers/Makefile @@ -0,0 +1,39 @@ +##===- clang/lib/Headers/Makefile --------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +include $(LEVEL)/Makefile.common + +HeaderDir := $(PROJ_OBJ_ROOT)/$(BuildMode)/Headers + +HEADERS := $(notdir $(wildcard $(PROJ_SRC_DIR)/*.h)) + +OBJHEADERS := $(addprefix $(HeaderDir)/, $(HEADERS)) + + +$(OBJHEADERS): $(HeaderDir)/%.h: $(PROJ_SRC_DIR)/%.h $(HeaderDir)/.dir + $(Verb) cp $< $@ + @echo Copying $(notdir $<) to build dir + +printit: + echo $(OBJHEADERS) + echo $(PROJ_SRC_DIR) + +# Hook into the standard Makefile rules. +all-local:: $(OBJHEADERS) + +PROJ_headers := $(DESTDIR)$(PROJ_prefix)/Headers + +INSTHEADERS := $(addprefix $(PROJ_headers)/, $(HEADERS)) + +$(INSTHEADERS): $(PROJ_headers)/%.h: $(HeaderDir)/%.h $(PROJ_headers)/.dir + $(Verb) $(DataInstall) $< $(PROJ_headers) + +install-local:: $(INSTHEADERS) + diff --git a/clang/lib/Headers/mmintrin.devel.h b/clang/lib/Headers/mmintrin.devel.h new file mode 100644 index 00000000000..70cded027ec --- /dev/null +++ b/clang/lib/Headers/mmintrin.devel.h @@ -0,0 +1,377 @@ +/*===---- mmintrin.h - MMX intrinsics --------------------------------------=== + * + * Copyright (c) 2008 Anders Carlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __MMINTRIN_H +#define __MMINTRIN_H + +#ifndef __MMX__ +#error "MMX instruction set not enabled" +#else + +typedef long long __m64 __attribute__((vector_size(8))); + +typedef int __v2si __attribute__((vector_size(8))); +typedef short __v4hi __attribute__((vector_size(8))); +typedef char __v8qi __attribute__((vector_size(8))); + +inline void __attribute__((__always_inline__)) _mm_empty() +{ + __builtin_ia32_emms(); +} + +inline __m64 __attribute__((__always_inline__)) _mm_cvtsi32_si64(int i) +{ + return (__m64)(__v2si){i, 0}; +} + +inline int __attribute__((__always_inline__)) _mm_cvtsi64_si32(__m64 m) +{ + return ((__v2si)m)[0]; +} + +inline __m64 __attribute__((__always_inline__)) _mm_cvtsi64_m64(long long i) +{ + return (__m64)i; +} + +inline long long __attribute__((__always_inline__)) _mm_cvtm64_si64(__m64 m) +{ + return (long long)m; +} + +inline __m64 __attribute__((__always_inline__)) _mm_packs_pi16(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_packsswb((__v4hi)m1, (__v4hi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_packs_pi32(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_packssdw((__v2si)m1, (__v2si)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_packs_pu16(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_packuswb((__v4hi)m1, (__v4hi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_unpackhi_pi8(__m64 m1, __m64 m2) +{ + // FIXME: use __builtin_shuffle_vector +} + +inline __m64 __attribute__((__always_inline__)) _mm_unpackhi_pi16(__m64 m1, __m64 m2) +{ + // FIXME: use __builtin_shuffle_vector +} + +inline __m64 __attribute__((__always_inline__)) _mm_unpackhi_pi32(__m64 m1, __m64 m2) +{ + // FIXME: use __builtin_shuffle_vector +} + +inline __m64 __attribute__((__always_inline__)) _mm_unpacklo_pi8(__m64 m1, __m64 m2) +{ + // FIXME: use __builtin_shuffle_vector +} + +inline __m64 __attribute__((__always_inline__)) _mm_unpacklo_pi16(__m64 m1, __m64 m2) +{ + // FIXME: use __builtin_shuffle_vector +} + +inline __m64 __attribute__((__always_inline__)) _mm_unpacklo_pi32(__m64 m1, __m64 m2) +{ + // FIXME: use __builtin_shuffle_vector +} + +inline __m64 __attribute__((__always_inline__)) _mm_add_pi8(__m64 m1, __m64 m2) +{ + return (__m64)((__v8qi)m1 + (__v8qi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_add_pi16(__m64 m1, __m64 m2) +{ + return (__m64)((__v4hi)m1 + (__v4hi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_add_pi32(__m64 m1, __m64 m2) +{ + return (__m64)((__v2si)m1 + (__v2si)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_adds_pi8(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_paddsb((__v8qi)m1, (__v8qi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_adds_pi16(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_paddsw((__v4hi)m1, (__v4hi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_adds_pu8(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_paddusb((__v8qi)m1, (__v8qi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_adds_pu16(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_paddusw((__v4hi)m1, (__v4hi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_sub_pi8(__m64 m1, __m64 m2) +{ + return (__m64)((__v8qi)m1 - (__v8qi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_sub_pi16(__m64 m1, __m64 m2) +{ + return (__m64)((__v4hi)m1 - (__v4hi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_sub_pi32(__m64 m1, __m64 m2) +{ + return (__m64)((__v2si)m1 - (__v2si)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_subs_pi8(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_psubsb((__v8qi)m1, (__v8qi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_subs_pi16(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_psubsw((__v4hi)m1, (__v4hi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_subs_pu8(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_psubusb((__v8qi)m1, (__v8qi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_subs_pu16(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_psubusw((__v4hi)m1, (__v4hi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_madd_pi16(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_pmaddwd((__v4hi)m1, (__v4hi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_mulhi_pi16(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_pmulhw((__v4hi)m1, (__v4hi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_mullo_pi16(__m64 m1, __m64 m2) +{ + return (__m64)((__v4hi)m1 * (__v4hi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_sll_pi16(__m64 m, __m64 count) +{ + return (__m64)__builtin_ia32_psllw((__v4hi)m, count); +} + +inline __m64 __attribute__((__always_inline__)) _mm_slli_pi16(__m64 m, int count) +{ + return (__m64)__builtin_ia32_psllwi((__v4hi)m, count); +} + +inline __m64 __attribute__((__always_inline__)) _mm_sll_pi32(__m64 m, __m64 count) +{ + return (__m64)__builtin_ia32_pslld((__v2si)m, count); +} + +inline __m64 __attribute__((__always_inline__)) _mm_slli_pi32(__m64 m, int count) +{ + return (__m64)__builtin_ia32_pslldi((__v2si)m, count); +} + +inline __m64 __attribute__((__always_inline__)) _mm_sll_pi64(__m64 m, __m64 count) +{ + return __builtin_ia32_psllq(m, count); +} + +inline __m64 __attribute__((__always_inline__)) _mm_slli_pi64(__m64 m, int count) +{ + return __builtin_ia32_psllqi(m, count); +} + +inline __m64 __attribute__((__always_inline__)) _mm_sra_pi16(__m64 m, __m64 count) +{ + return (__m64)__builtin_ia32_psraw((__v4hi)m, count); +} + +inline __m64 __attribute__((__always_inline__)) _mm_srai_pi16(__m64 m, int count) +{ + return (__m64)__builtin_ia32_psrawi((__v4hi)m, count); +} + +inline __m64 __attribute__((__always_inline__)) _mm_sra_pi32(__m64 m, __m64 count) +{ + return (__m64)__builtin_ia32_psrad((__v2si)m, count); +} + +inline __m64 __attribute__((__always_inline__)) _mm_srai_pi32(__m64 m, int count) +{ + return (__m64)__builtin_ia32_psradi((__v2si)m, count); +} + +inline __m64 __attribute__((__always_inline__)) _mm_srl_pi16(__m64 m, __m64 count) +{ + return (__m64)__builtin_ia32_psrlw((__v4hi)m, count); +} + +inline __m64 __attribute__((__always_inline__)) _mm_srli_pi16(__m64 m, int count) +{ + return (__m64)__builtin_ia32_psrlwi((__v4hi)m, count); +} + +inline __m64 __attribute__((__always_inline__)) _mm_srl_pi32(__m64 m, __m64 count) +{ + return (__m64)__builtin_ia32_psrld((__v2si)m, count); +} + +inline __m64 __attribute__((__always_inline__)) _mm_srli_pi32(__m64 m, int count) +{ + return (__m64)__builtin_ia32_psrldi((__v2si)m, count); +} + +inline __m64 __attribute__((__always_inline__)) _mm_srl_pi64(__m64 m, __m64 count) +{ + return (__m64)__builtin_ia32_psrlq(m, count); +} + +inline __m64 __attribute__((__always_inline__)) _mm_srli_pi64(__m64 m, int count) +{ + return __builtin_ia32_psrlqi(m, count); +} + +inline __m64 __attribute__((__always_inline__)) _mm_and_si64(__m64 m1, __m64 m2) +{ + return m1 & m2; +} + +inline __m64 __attribute__((__always_inline__)) _mm_andnot_si64(__m64 m1, __m64 m2) +{ + return ~m1 & m2; +} + +inline __m64 __attribute__((__always_inline__)) _mm_or_si64(__m64 m1, __m64 m2) +{ + return m1 | m2; +} + +inline __m64 __attribute__((__always_inline__)) _mm_xor_si64(__m64 m1, __m64 m2) +{ + return m1 ^ m2; +} + +inline __m64 __attribute__((__always_inline__)) _mm_cmpeq_pi8(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_pcmpeqb((__v8qi)m1, (__v8qi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_cmpeq_pi16(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_pcmpeqw((__v4hi)m1, (__v4hi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_cmpeq_pi32(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_pcmpeqd((__v2si)m1, (__v2si)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_cmpgt_pi8(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_pcmpgtb((__v8qi)m1, (__v8qi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_cmpgt_pi16(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_pcmpgtw((__v4hi)m1, (__v4hi)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_cmpgt_pi32(__m64 m1, __m64 m2) +{ + return (__m64)__builtin_ia32_pcmpgtd((__v2si)m1, (__v2si)m2); +} + +inline __m64 __attribute__((__always_inline__)) _mm_setzero_si64() +{ + return (__m64){ 0LL }; +} + +inline __m64 __attribute__((__always_inline__)) _mm_set_pi32(int i1, int i0) +{ + return (__m64)(__v2si){ i0, i1 }; +} + +inline __m64 __attribute__((__always_inline__)) _mm_set_pi16(short s3, short s2, short s1, short s0) +{ + return (__m64)(__v4hi){ s0, s1, s2, s3 }; +} + +inline __m64 __attribute__((__always_inline__)) _mm_set_pi8(char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0) +{ + return (__m64)(__v8qi){ b0, b1, b2, b3, b4, b5, b6, b7 }; +} + +inline __m64 __attribute__((__always_inline__)) _mm_set1_pi32(int i) +{ + return (__m64)(__v2si){ i, i }; +} + +inline __m64 __attribute__((__always_inline__)) _mm_set1_pi16(short s) +{ + return (__m64)(__v4hi){ s }; +} + +inline __m64 __attribute__((__always_inline__)) _mm_set1_pi8(char b) +{ + return (__m64)(__v8qi){ b }; +} + +inline __m64 __attribute__((__always_inline__)) _mm_setr_pi32(int i1, int i0) +{ + return (__m64)(__v2si){ i1, i0 }; +} + +inline __m64 __attribute__((__always_inline__)) _mm_setr_pi16(short s3, short s2, short s1, short s0) +{ + return (__m64)(__v4hi){ s3, s2, s1, s0 }; +} + +inline __m64 __attribute__((__always_inline__)) _mm_setr_pi8(char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0) +{ + return (__m64)(__v8qi){ b7, b6, b5, b4, b3, b2, b1, b0 }; +} + +#endif /* __MMX__ */ + +#endif /* __MMINTRIN_H */ + diff --git a/clang/lib/Headers/stdbool.h b/clang/lib/Headers/stdbool.h new file mode 100644 index 00000000000..e44a1f9a979 --- /dev/null +++ b/clang/lib/Headers/stdbool.h @@ -0,0 +1,38 @@ +/*===---- stdbool.h - Standard header for booleans -------------------------=== + * + * Copyright (c) 2008 Eli Friedman + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __STDBOOL_H +#define __STDBOOL_H + +/* Don't define bool, true, and false in C++ */ +#ifndef __cplusplus +#define bool _Bool +#define true 1 +#define false 0 +#endif + +#define __bool_true_false_are_defined 1 + +#endif /* __STDBOOL_H */ diff --git a/clang/lib/Lex/HeaderMap.cpp b/clang/lib/Lex/HeaderMap.cpp new file mode 100644 index 00000000000..282e742b4c8 --- /dev/null +++ b/clang/lib/Lex/HeaderMap.cpp @@ -0,0 +1,242 @@ +//===--- HeaderMap.cpp - A file that acts like dir of symlinks ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the HeaderMap interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/HeaderMap.h" +#include "clang/Basic/FileManager.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +using namespace clang; + +//===----------------------------------------------------------------------===// +// Data Structures and Manifest Constants +//===----------------------------------------------------------------------===// + +enum { + HMAP_HeaderMagicNumber = ('h' << 24) | ('m' << 16) | ('a' << 8) | 'p', + HMAP_HeaderVersion = 1, + + HMAP_EmptyBucketKey = 0 +}; + +namespace clang { +struct HMapBucket { + uint32_t Key; // Offset (into strings) of key. + + uint32_t Prefix; // Offset (into strings) of value prefix. + uint32_t Suffix; // Offset (into strings) of value suffix. +}; + +struct HMapHeader { + uint32_t Magic; // Magic word, also indicates byte order. + uint16_t Version; // Version number -- currently 1. + uint16_t Reserved; // Reserved for future use - zero for now. + uint32_t StringsOffset; // Offset to start of string pool. + uint32_t NumEntries; // Number of entries in the string table. + uint32_t NumBuckets; // Number of buckets (always a power of 2). + uint32_t MaxValueLength; // Length of longest result path (excluding nul). + // An array of 'NumBuckets' HMapBucket objects follows this header. + // Strings follow the buckets, at StringsOffset. +}; +} // end namespace clang. + +/// HashHMapKey - This is the 'well known' hash function required by the file +/// format, used to look up keys in the hash table. The hash table uses simple +/// linear probing based on this function. +static inline unsigned HashHMapKey(const char *S, const char *End) { + unsigned Result = 0; + + for (; S != End; S++) + Result += tolower(*S) * 13; + return Result; +} + + + +//===----------------------------------------------------------------------===// +// Verification and Construction +//===----------------------------------------------------------------------===// + +/// HeaderMap::Create - This attempts to load the specified file as a header +/// map. If it doesn't look like a HeaderMap, it gives up and returns null. +/// If it looks like a HeaderMap but is obviously corrupted, it puts a reason +/// into the string error argument and returns null. +const HeaderMap *HeaderMap::Create(const FileEntry *FE) { + // If the file is too small to be a header map, ignore it. + unsigned FileSize = FE->getSize(); + if (FileSize <= sizeof(HMapHeader)) return 0; + + llvm::OwningPtr<const llvm::MemoryBuffer> FileBuffer( + llvm::MemoryBuffer::getFile(FE->getName(), strlen(FE->getName()), 0, + FE->getSize())); + if (FileBuffer == 0) return 0; // Unreadable file? + const char *FileStart = FileBuffer->getBufferStart(); + + // We know the file is at least as big as the header, check it now. + const HMapHeader *Header = reinterpret_cast<const HMapHeader*>(FileStart); + + // Sniff it to see if it's a headermap by checking the magic number and + // version. + bool NeedsByteSwap; + if (Header->Magic == HMAP_HeaderMagicNumber && + Header->Version == HMAP_HeaderVersion) + NeedsByteSwap = false; + else if (Header->Magic == llvm::ByteSwap_32(HMAP_HeaderMagicNumber) && + Header->Version == llvm::ByteSwap_16(HMAP_HeaderVersion)) + NeedsByteSwap = true; // Mixed endianness headermap. + else + return 0; // Not a header map. + + if (Header->Reserved != 0) return 0; + + // Okay, everything looks good, create the header map. + return new HeaderMap(FileBuffer.take(), NeedsByteSwap); +} + +HeaderMap::~HeaderMap() { + delete FileBuffer; +} + +//===----------------------------------------------------------------------===// +// Utility Methods +//===----------------------------------------------------------------------===// + + +/// getFileName - Return the filename of the headermap. +const char *HeaderMap::getFileName() const { + return FileBuffer->getBufferIdentifier(); +} + +unsigned HeaderMap::getEndianAdjustedWord(unsigned X) const { + if (!NeedsBSwap) return X; + return llvm::ByteSwap_32(X); +} + +/// getHeader - Return a reference to the file header, in unbyte-swapped form. +/// This method cannot fail. +const HMapHeader &HeaderMap::getHeader() const { + // We know the file is at least as big as the header. Return it. + return *reinterpret_cast<const HMapHeader*>(FileBuffer->getBufferStart()); +} + +/// getBucket - Return the specified hash table bucket from the header map, +/// bswap'ing its fields as appropriate. If the bucket number is not valid, +/// this return a bucket with an empty key (0). +HMapBucket HeaderMap::getBucket(unsigned BucketNo) const { + HMapBucket Result; + Result.Key = HMAP_EmptyBucketKey; + + const HMapBucket *BucketArray = + reinterpret_cast<const HMapBucket*>(FileBuffer->getBufferStart() + + sizeof(HMapHeader)); + + const HMapBucket *BucketPtr = BucketArray+BucketNo; + if ((char*)(BucketPtr+1) > FileBuffer->getBufferEnd()) + return Result; // Invalid buffer, corrupt hmap. + + // Otherwise, the bucket is valid. Load the values, bswapping as needed. + Result.Key = getEndianAdjustedWord(BucketPtr->Key); + Result.Prefix = getEndianAdjustedWord(BucketPtr->Prefix); + Result.Suffix = getEndianAdjustedWord(BucketPtr->Suffix); + return Result; +} + +/// getString - Look up the specified string in the string table. If the string +/// index is not valid, it returns an empty string. +const char *HeaderMap::getString(unsigned StrTabIdx) const { + // Add the start of the string table to the idx. + StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset); + + // Check for invalid index. + if (StrTabIdx >= FileBuffer->getBufferSize()) + return 0; + + // Otherwise, we have a valid pointer into the file. Just return it. We know + // that the "string" can not overrun the end of the file, because the buffer + // is nul terminated by virtue of being a MemoryBuffer. + return FileBuffer->getBufferStart()+StrTabIdx; +} + +/// StringsEqualWithoutCase - Compare the specified two strings for case- +/// insensitive equality, returning true if they are equal. Both strings are +/// known to have the same length. +static bool StringsEqualWithoutCase(const char *S1, const char *S2, + unsigned Len) { + for (; Len; ++S1, ++S2, --Len) + if (tolower(*S1) != tolower(*S2)) + return false; + return true; +} + +//===----------------------------------------------------------------------===// +// The Main Drivers +//===----------------------------------------------------------------------===// + +/// dump - Print the contents of this headermap to stderr. +void HeaderMap::dump() const { + const HMapHeader &Hdr = getHeader(); + unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets); + + fprintf(stderr, "Header Map %s:\n %d buckets, %d entries\n", + getFileName(), NumBuckets, + getEndianAdjustedWord(Hdr.NumEntries)); + + for (unsigned i = 0; i != NumBuckets; ++i) { + HMapBucket B = getBucket(i); + if (B.Key == HMAP_EmptyBucketKey) continue; + + const char *Key = getString(B.Key); + const char *Prefix = getString(B.Prefix); + const char *Suffix = getString(B.Suffix); + fprintf(stderr, " %d. %s -> '%s' '%s'\n", i, Key, Prefix, Suffix); + } +} + +/// LookupFile - Check to see if the specified relative filename is located in +/// this HeaderMap. If so, open it and return its FileEntry. +const FileEntry *HeaderMap::LookupFile(const char *FilenameStart, + const char *FilenameEnd, + FileManager &FM) const { + const HMapHeader &Hdr = getHeader(); + unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets); + + // If the number of buckets is not a power of two, the headermap is corrupt. + // Don't probe infinitely. + if (NumBuckets & (NumBuckets-1)) + return 0; + + // Linearly probe the hash table. + for (unsigned Bucket = HashHMapKey(FilenameStart, FilenameEnd);; ++Bucket) { + HMapBucket B = getBucket(Bucket & (NumBuckets-1)); + if (B.Key == HMAP_EmptyBucketKey) return 0; // Hash miss. + + // See if the key matches. If not, probe on. + const char *Key = getString(B.Key); + unsigned BucketKeyLen = strlen(Key); + if (BucketKeyLen != unsigned(FilenameEnd-FilenameStart)) + continue; + + // See if the actual strings equal. + if (!StringsEqualWithoutCase(FilenameStart, Key, BucketKeyLen)) + continue; + + // If so, we have a match in the hash table. Construct the destination + // path. + llvm::SmallString<1024> DestPath; + DestPath += getString(B.Prefix); + DestPath += getString(B.Suffix); + return FM.getFile(DestPath.begin(), DestPath.end()); + } +} diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp new file mode 100644 index 00000000000..44ae35c8b7e --- /dev/null +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -0,0 +1,425 @@ +//===--- HeaderSearch.cpp - Resolve Header File Locations ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the DirectoryLookup and HeaderSearch interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/HeaderMap.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/IdentifierTable.h" +#include "llvm/System/Path.h" +#include "llvm/ADT/SmallString.h" +using namespace clang; + +HeaderSearch::HeaderSearch(FileManager &FM) : FileMgr(FM), FrameworkMap(64) { + SystemDirIdx = 0; + NoCurDirSearch = false; + + NumIncluded = 0; + NumMultiIncludeFileOptzn = 0; + NumFrameworkLookups = NumSubFrameworkLookups = 0; +} + +HeaderSearch::~HeaderSearch() { + // Delete headermaps. + for (unsigned i = 0, e = HeaderMaps.size(); i != e; ++i) + delete HeaderMaps[i].second; +} + +void HeaderSearch::PrintStats() { + fprintf(stderr, "\n*** HeaderSearch Stats:\n"); + fprintf(stderr, "%d files tracked.\n", (int)FileInfo.size()); + unsigned NumOnceOnlyFiles = 0, MaxNumIncludes = 0, NumSingleIncludedFiles = 0; + for (unsigned i = 0, e = FileInfo.size(); i != e; ++i) { + NumOnceOnlyFiles += FileInfo[i].isImport; + if (MaxNumIncludes < FileInfo[i].NumIncludes) + MaxNumIncludes = FileInfo[i].NumIncludes; + NumSingleIncludedFiles += FileInfo[i].NumIncludes == 1; + } + fprintf(stderr, " %d #import/#pragma once files.\n", NumOnceOnlyFiles); + fprintf(stderr, " %d included exactly once.\n", NumSingleIncludedFiles); + fprintf(stderr, " %d max times a file is included.\n", MaxNumIncludes); + + fprintf(stderr, " %d #include/#include_next/#import.\n", NumIncluded); + fprintf(stderr, " %d #includes skipped due to" + " the multi-include optimization.\n", NumMultiIncludeFileOptzn); + + fprintf(stderr, "%d framework lookups.\n", NumFrameworkLookups); + fprintf(stderr, "%d subframework lookups.\n", NumSubFrameworkLookups); +} + +/// CreateHeaderMap - This method returns a HeaderMap for the specified +/// FileEntry, uniquing them through the the 'HeaderMaps' datastructure. +const HeaderMap *HeaderSearch::CreateHeaderMap(const FileEntry *FE) { + // We expect the number of headermaps to be small, and almost always empty. + // If it ever grows, use of a linear search should be re-evaluated. + if (!HeaderMaps.empty()) { + for (unsigned i = 0, e = HeaderMaps.size(); i != e; ++i) + // Pointer equality comparison of FileEntries works because they are + // already uniqued by inode. + if (HeaderMaps[i].first == FE) + return HeaderMaps[i].second; + } + + if (const HeaderMap *HM = HeaderMap::Create(FE)) { + HeaderMaps.push_back(std::make_pair(FE, HM)); + return HM; + } + + return 0; +} + +//===----------------------------------------------------------------------===// +// File lookup within a DirectoryLookup scope +//===----------------------------------------------------------------------===// + +/// getName - Return the directory or filename corresponding to this lookup +/// object. +const char *DirectoryLookup::getName() const { + if (isNormalDir()) + return getDir()->getName(); + if (isFramework()) + return getFrameworkDir()->getName(); + assert(isHeaderMap() && "Unknown DirectoryLookup"); + return getHeaderMap()->getFileName(); +} + + +/// LookupFile - Lookup the specified file in this search path, returning it +/// if it exists or returning null if not. +const FileEntry *DirectoryLookup::LookupFile(const char *FilenameStart, + const char *FilenameEnd, + HeaderSearch &HS) const { + llvm::SmallString<1024> TmpDir; + if (isNormalDir()) { + // Concatenate the requested file onto the directory. + // FIXME: Portability. Filename concatenation should be in sys::Path. + TmpDir += getDir()->getName(); + TmpDir.push_back('/'); + TmpDir.append(FilenameStart, FilenameEnd); + return HS.getFileMgr().getFile(TmpDir.begin(), TmpDir.end()); + } + + if (isFramework()) + return DoFrameworkLookup(FilenameStart, FilenameEnd, HS); + + assert(isHeaderMap() && "Unknown directory lookup"); + return getHeaderMap()->LookupFile(FilenameStart, FilenameEnd,HS.getFileMgr()); +} + + +/// DoFrameworkLookup - Do a lookup of the specified file in the current +/// DirectoryLookup, which is a framework directory. +const FileEntry *DirectoryLookup::DoFrameworkLookup(const char *FilenameStart, + const char *FilenameEnd, + HeaderSearch &HS) const { + FileManager &FileMgr = HS.getFileMgr(); + + // Framework names must have a '/' in the filename. + const char *SlashPos = std::find(FilenameStart, FilenameEnd, '/'); + if (SlashPos == FilenameEnd) return 0; + + // Find out if this is the home for the specified framework, by checking + // HeaderSearch. Possible answer are yes/no and unknown. + const DirectoryEntry *&FrameworkDirCache = + HS.LookupFrameworkCache(FilenameStart, SlashPos); + + // If it is known and in some other directory, fail. + if (FrameworkDirCache && FrameworkDirCache != getFrameworkDir()) + return 0; + + // Otherwise, construct the path to this framework dir. + + // FrameworkName = "/System/Library/Frameworks/" + llvm::SmallString<1024> FrameworkName; + FrameworkName += getFrameworkDir()->getName(); + if (FrameworkName.empty() || FrameworkName.back() != '/') + FrameworkName.push_back('/'); + + // FrameworkName = "/System/Library/Frameworks/Cocoa" + FrameworkName.append(FilenameStart, SlashPos); + + // FrameworkName = "/System/Library/Frameworks/Cocoa.framework/" + FrameworkName += ".framework/"; + + // If the cache entry is still unresolved, query to see if the cache entry is + // still unresolved. If so, check its existence now. + if (FrameworkDirCache == 0) { + HS.IncrementFrameworkLookupCount(); + + // If the framework dir doesn't exist, we fail. + // FIXME: It's probably more efficient to query this with FileMgr.getDir. + if (!llvm::sys::Path(std::string(FrameworkName.begin(), + FrameworkName.end())).exists()) + return 0; + + // Otherwise, if it does, remember that this is the right direntry for this + // framework. + FrameworkDirCache = getFrameworkDir(); + } + + // Check "/System/Library/Frameworks/Cocoa.framework/Headers/file.h" + unsigned OrigSize = FrameworkName.size(); + + FrameworkName += "Headers/"; + FrameworkName.append(SlashPos+1, FilenameEnd); + if (const FileEntry *FE = FileMgr.getFile(FrameworkName.begin(), + FrameworkName.end())) { + return FE; + } + + // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h" + const char *Private = "Private"; + FrameworkName.insert(FrameworkName.begin()+OrigSize, Private, + Private+strlen(Private)); + return FileMgr.getFile(FrameworkName.begin(), FrameworkName.end()); +} + + +//===----------------------------------------------------------------------===// +// Header File Location. +//===----------------------------------------------------------------------===// + + +/// LookupFile - Given a "foo" or <foo> reference, look up the indicated file, +/// return null on failure. isAngled indicates whether the file reference is +/// for system #include's or not (i.e. using <> instead of ""). CurFileEnt, if +/// non-null, indicates where the #including file is, in case a relative search +/// is needed. +const FileEntry *HeaderSearch::LookupFile(const char *FilenameStart, + const char *FilenameEnd, + bool isAngled, + const DirectoryLookup *FromDir, + const DirectoryLookup *&CurDir, + const FileEntry *CurFileEnt) { + // If 'Filename' is absolute, check to see if it exists and no searching. + // FIXME: Portability. This should be a sys::Path interface, this doesn't + // handle things like C:\foo.txt right, nor win32 \\network\device\blah. + if (FilenameStart[0] == '/') { + CurDir = 0; + + // If this was an #include_next "/absolute/file", fail. + if (FromDir) return 0; + + // Otherwise, just return the file. + return FileMgr.getFile(FilenameStart, FilenameEnd); + } + + // Step #0, unless disabled, check to see if the file is in the #includer's + // directory. This has to be based on CurFileEnt, not CurDir, because + // CurFileEnt could be a #include of a subdirectory (#include "foo/bar.h") and + // a subsequent include of "baz.h" should resolve to "whatever/foo/baz.h". + // This search is not done for <> headers. + if (CurFileEnt && !isAngled && !NoCurDirSearch) { + llvm::SmallString<1024> TmpDir; + // Concatenate the requested file onto the directory. + // FIXME: Portability. Filename concatenation should be in sys::Path. + TmpDir += CurFileEnt->getDir()->getName(); + TmpDir.push_back('/'); + TmpDir.append(FilenameStart, FilenameEnd); + if (const FileEntry *FE = FileMgr.getFile(TmpDir.begin(), TmpDir.end())) { + // Leave CurDir unset. + // This file is a system header or C++ unfriendly if the old file is. + // + // Note that the temporary 'DirInfo' is required here, as either call to + // getFileInfo could resize the vector and we don't want to rely on order + // of evaluation. + unsigned DirInfo = getFileInfo(CurFileEnt).DirInfo; + getFileInfo(FE).DirInfo = DirInfo; + return FE; + } + } + + CurDir = 0; + + // If this is a system #include, ignore the user #include locs. + unsigned i = isAngled ? SystemDirIdx : 0; + + // If this is a #include_next request, start searching after the directory the + // file was found in. + if (FromDir) + i = FromDir-&SearchDirs[0]; + + // Cache all of the lookups performed by this method. Many headers are + // multiply included, and the "pragma once" optimization prevents them from + // being relex/pp'd, but they would still have to search through a + // (potentially huge) series of SearchDirs to find it. + std::pair<unsigned, unsigned> &CacheLookup = + LookupFileCache.GetOrCreateValue(FilenameStart, FilenameEnd).getValue(); + + // If the entry has been previously looked up, the first value will be + // non-zero. If the value is equal to i (the start point of our search), then + // this is a matching hit. + if (CacheLookup.first == i+1) { + // Skip querying potentially lots of directories for this lookup. + i = CacheLookup.second; + } else { + // Otherwise, this is the first query, or the previous query didn't match + // our search start. We will fill in our found location below, so prime the + // start point value. + CacheLookup.first = i+1; + } + + // Check each directory in sequence to see if it contains this file. + for (; i != SearchDirs.size(); ++i) { + const FileEntry *FE = + SearchDirs[i].LookupFile(FilenameStart, FilenameEnd, *this); + if (!FE) continue; + + CurDir = &SearchDirs[i]; + + // This file is a system header or C++ unfriendly if the dir is. + getFileInfo(FE).DirInfo = CurDir->getDirCharacteristic(); + + // Remember this location for the next lookup we do. + CacheLookup.second = i; + return FE; + } + + // Otherwise, didn't find it. Remember we didn't find this. + CacheLookup.second = SearchDirs.size(); + return 0; +} + +/// LookupSubframeworkHeader - Look up a subframework for the specified +/// #include file. For example, if #include'ing <HIToolbox/HIToolbox.h> from +/// within ".../Carbon.framework/Headers/Carbon.h", check to see if HIToolbox +/// is a subframework within Carbon.framework. If so, return the FileEntry +/// for the designated file, otherwise return null. +const FileEntry *HeaderSearch:: +LookupSubframeworkHeader(const char *FilenameStart, + const char *FilenameEnd, + const FileEntry *ContextFileEnt) { + assert(ContextFileEnt && "No context file?"); + + // Framework names must have a '/' in the filename. Find it. + const char *SlashPos = std::find(FilenameStart, FilenameEnd, '/'); + if (SlashPos == FilenameEnd) return 0; + + // Look up the base framework name of the ContextFileEnt. + const char *ContextName = ContextFileEnt->getName(); + + // If the context info wasn't a framework, couldn't be a subframework. + const char *FrameworkPos = strstr(ContextName, ".framework/"); + if (FrameworkPos == 0) + return 0; + + llvm::SmallString<1024> FrameworkName(ContextName, + FrameworkPos+strlen(".framework/")); + + // Append Frameworks/HIToolbox.framework/ + FrameworkName += "Frameworks/"; + FrameworkName.append(FilenameStart, SlashPos); + FrameworkName += ".framework/"; + + llvm::StringMapEntry<const DirectoryEntry *> &CacheLookup = + FrameworkMap.GetOrCreateValue(FilenameStart, SlashPos); + + // Some other location? + if (CacheLookup.getValue() && + CacheLookup.getKeyLength() == FrameworkName.size() && + memcmp(CacheLookup.getKeyData(), &FrameworkName[0], + CacheLookup.getKeyLength()) != 0) + return 0; + + // Cache subframework. + if (CacheLookup.getValue() == 0) { + ++NumSubFrameworkLookups; + + // If the framework dir doesn't exist, we fail. + const DirectoryEntry *Dir = FileMgr.getDirectory(FrameworkName.begin(), + FrameworkName.end()); + if (Dir == 0) return 0; + + // Otherwise, if it does, remember that this is the right direntry for this + // framework. + CacheLookup.setValue(Dir); + } + + const FileEntry *FE = 0; + + // Check ".../Frameworks/HIToolbox.framework/Headers/HIToolbox.h" + llvm::SmallString<1024> HeadersFilename(FrameworkName); + HeadersFilename += "Headers/"; + HeadersFilename.append(SlashPos+1, FilenameEnd); + if (!(FE = FileMgr.getFile(HeadersFilename.begin(), + HeadersFilename.end()))) { + + // Check ".../Frameworks/HIToolbox.framework/PrivateHeaders/HIToolbox.h" + HeadersFilename = FrameworkName; + HeadersFilename += "PrivateHeaders/"; + HeadersFilename.append(SlashPos+1, FilenameEnd); + if (!(FE = FileMgr.getFile(HeadersFilename.begin(), HeadersFilename.end()))) + return 0; + } + + // This file is a system header or C++ unfriendly if the old file is. + // + // Note that the temporary 'DirInfo' is required here, as either call to + // getFileInfo could resize the vector and we don't want to rely on order + // of evaluation. + unsigned DirInfo = getFileInfo(ContextFileEnt).DirInfo; + getFileInfo(FE).DirInfo = DirInfo; + return FE; +} + +//===----------------------------------------------------------------------===// +// File Info Management. +//===----------------------------------------------------------------------===// + + +/// getFileInfo - Return the PerFileInfo structure for the specified +/// FileEntry. +HeaderSearch::PerFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) { + if (FE->getUID() >= FileInfo.size()) + FileInfo.resize(FE->getUID()+1); + return FileInfo[FE->getUID()]; +} + +/// ShouldEnterIncludeFile - Mark the specified file as a target of of a +/// #include, #include_next, or #import directive. Return false if #including +/// the file will have no effect or true if we should include it. +bool HeaderSearch::ShouldEnterIncludeFile(const FileEntry *File, bool isImport){ + ++NumIncluded; // Count # of attempted #includes. + + // Get information about this file. + PerFileInfo &FileInfo = getFileInfo(File); + + // If this is a #import directive, check that we have not already imported + // this header. + if (isImport) { + // If this has already been imported, don't import it again. + FileInfo.isImport = true; + + // Has this already been #import'ed or #include'd? + if (FileInfo.NumIncludes) return false; + } else { + // Otherwise, if this is a #include of a file that was previously #import'd + // or if this is the second #include of a #pragma once file, ignore it. + if (FileInfo.isImport) + return false; + } + + // Next, check to see if the file is wrapped with #ifndef guards. If so, and + // if the macro that guards it is defined, we know the #include has no effect. + if (FileInfo.ControllingMacro && + FileInfo.ControllingMacro->hasMacroDefinition()) { + ++NumMultiIncludeFileOptzn; + return false; + } + + // Increment the number of times this file has been included. + ++FileInfo.NumIncludes; + + return true; +} + + diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp new file mode 100644 index 00000000000..98bbb386305 --- /dev/null +++ b/clang/lib/Lex/Lexer.cpp @@ -0,0 +1,1661 @@ +//===--- Lexer.cpp - C Language Family Lexer ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Lexer and Token interfaces. +// +//===----------------------------------------------------------------------===// +// +// TODO: GCC Diagnostics emitted by the lexer: +// PEDWARN: (form feed|vertical tab) in preprocessing directive +// +// Universal characters, unicode, char mapping: +// WARNING: `%.*s' is not in NFKC +// WARNING: `%.*s' is not in NFC +// +// Other: +// TODO: Options to support: +// -fexec-charset,-fwide-exec-charset +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Lexer.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cctype> +using namespace clang; + +static void InitCharacterInfo(); + +//===----------------------------------------------------------------------===// +// Token Class Implementation +//===----------------------------------------------------------------------===// + +/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier. +bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const { + return is(tok::identifier) && + getIdentifierInfo()->getObjCKeywordID() == objcKey; +} + +/// getObjCKeywordID - Return the ObjC keyword kind. +tok::ObjCKeywordKind Token::getObjCKeywordID() const { + IdentifierInfo *specId = getIdentifierInfo(); + return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword; +} + +/// isNamedIdentifier - Return true if this token is a ppidentifier with the +/// specified name. For example, tok.isNamedIdentifier("this"). +bool Token::isNamedIdentifier(const char *Name) const { + return IdentInfo && !strcmp(IdentInfo->getName(), Name); +} + + +//===----------------------------------------------------------------------===// +// Lexer Class Implementation +//===----------------------------------------------------------------------===// + + +/// Lexer constructor - Create a new lexer object for the specified buffer +/// with the specified preprocessor managing the lexing process. This lexer +/// assumes that the associated file buffer and Preprocessor objects will +/// outlive it, so it doesn't take ownership of either of them. +Lexer::Lexer(SourceLocation fileloc, Preprocessor &pp, + const char *BufStart, const char *BufEnd) + : FileLoc(fileloc), PP(&pp), Features(pp.getLangOptions()) { + + SourceManager &SourceMgr = PP->getSourceManager(); + unsigned InputFileID = SourceMgr.getPhysicalLoc(FileLoc).getFileID(); + const llvm::MemoryBuffer *InputFile = SourceMgr.getBuffer(InputFileID); + + Is_PragmaLexer = false; + InitCharacterInfo(); + + // BufferStart must always be InputFile->getBufferStart(). + BufferStart = InputFile->getBufferStart(); + + // BufferPtr and BufferEnd can start out somewhere inside the current buffer. + // If unspecified, they starts at the start/end of the buffer. + BufferPtr = BufStart ? BufStart : BufferStart; + BufferEnd = BufEnd ? BufEnd : InputFile->getBufferEnd(); + + assert(BufferEnd[0] == 0 && + "We assume that the input buffer has a null character at the end" + " to simplify lexing!"); + + // Start of the file is a start of line. + IsAtStartOfLine = true; + + // We are not after parsing a #. + ParsingPreprocessorDirective = false; + + // We are not after parsing #include. + ParsingFilename = false; + + // We are not in raw mode. Raw mode disables diagnostics and interpretation + // of tokens (e.g. identifiers, thus disabling macro expansion). It is used + // to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block + // or otherwise skipping over tokens. + LexingRawMode = false; + + // Default to keeping comments if requested. + KeepCommentMode = PP->getCommentRetentionState(); +} + +/// Lexer constructor - Create a new raw lexer object. This object is only +/// suitable for calls to 'LexRawToken'. This lexer assumes that the +/// associated file buffer will outlive it, so it doesn't take ownership of +/// either of them. +Lexer::Lexer(SourceLocation fileloc, const LangOptions &features, + const char *BufStart, const char *BufEnd) + : FileLoc(fileloc), PP(0), Features(features) { + Is_PragmaLexer = false; + InitCharacterInfo(); + + BufferStart = BufStart; + BufferPtr = BufStart; + BufferEnd = BufEnd; + + assert(BufferEnd[0] == 0 && + "We assume that the input buffer has a null character at the end" + " to simplify lexing!"); + + // Start of the file is a start of line. + IsAtStartOfLine = true; + + // We are not after parsing a #. + ParsingPreprocessorDirective = false; + + // We are not after parsing #include. + ParsingFilename = false; + + // We *are* in raw mode. + LexingRawMode = true; + + // Never keep comments in raw mode. + KeepCommentMode = false; +} + + +/// Stringify - Convert the specified string into a C string, with surrounding +/// ""'s, and with escaped \ and " characters. +std::string Lexer::Stringify(const std::string &Str, bool Charify) { + std::string Result = Str; + char Quote = Charify ? '\'' : '"'; + for (unsigned i = 0, e = Result.size(); i != e; ++i) { + if (Result[i] == '\\' || Result[i] == Quote) { + Result.insert(Result.begin()+i, '\\'); + ++i; ++e; + } + } + return Result; +} + +/// Stringify - Convert the specified string into a C string by escaping '\' +/// and " characters. This does not add surrounding ""'s to the string. +void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) { + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + if (Str[i] == '\\' || Str[i] == '"') { + Str.insert(Str.begin()+i, '\\'); + ++i; ++e; + } + } +} + + +/// MeasureTokenLength - Relex the token at the specified location and return +/// its length in bytes in the input file. If the token needs cleaning (e.g. +/// includes a trigraph or an escaped newline) then this count includes bytes +/// that are part of that. +unsigned Lexer::MeasureTokenLength(SourceLocation Loc, + const SourceManager &SM) { + // If this comes from a macro expansion, we really do want the macro name, not + // the token this macro expanded to. + Loc = SM.getLogicalLoc(Loc); + + const char *StrData = SM.getCharacterData(Loc); + + // TODO: this could be special cased for common tokens like identifiers, ')', + // etc to make this faster, if it mattered. Just look at StrData[0] to handle + // all obviously single-char tokens. This could use + // Lexer::isObviouslySimpleCharacter for example to handle identifiers or + // something. + + + const char *BufEnd = SM.getBufferData(Loc.getFileID()).second; + + // Create a langops struct and enable trigraphs. This is sufficient for + // measuring tokens. + LangOptions LangOpts; + LangOpts.Trigraphs = true; + + // Create a lexer starting at the beginning of this token. + Lexer TheLexer(Loc, LangOpts, StrData, BufEnd); + Token TheTok; + TheLexer.LexRawToken(TheTok); + return TheTok.getLength(); +} + +//===----------------------------------------------------------------------===// +// Character information. +//===----------------------------------------------------------------------===// + +static unsigned char CharInfo[256]; + +enum { + CHAR_HORZ_WS = 0x01, // ' ', '\t', '\f', '\v'. Note, no '\0' + CHAR_VERT_WS = 0x02, // '\r', '\n' + CHAR_LETTER = 0x04, // a-z,A-Z + CHAR_NUMBER = 0x08, // 0-9 + CHAR_UNDER = 0x10, // _ + CHAR_PERIOD = 0x20 // . +}; + +static void InitCharacterInfo() { + static bool isInited = false; + if (isInited) return; + isInited = true; + + // Intiialize the CharInfo table. + // TODO: statically initialize this. + CharInfo[(int)' '] = CharInfo[(int)'\t'] = + CharInfo[(int)'\f'] = CharInfo[(int)'\v'] = CHAR_HORZ_WS; + CharInfo[(int)'\n'] = CharInfo[(int)'\r'] = CHAR_VERT_WS; + + CharInfo[(int)'_'] = CHAR_UNDER; + CharInfo[(int)'.'] = CHAR_PERIOD; + for (unsigned i = 'a'; i <= 'z'; ++i) + CharInfo[i] = CharInfo[i+'A'-'a'] = CHAR_LETTER; + for (unsigned i = '0'; i <= '9'; ++i) + CharInfo[i] = CHAR_NUMBER; +} + +/// isIdentifierBody - Return true if this is the body character of an +/// identifier, which is [a-zA-Z0-9_]. +static inline bool isIdentifierBody(unsigned char c) { + return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER)) ? true : false; +} + +/// isHorizontalWhitespace - Return true if this character is horizontal +/// whitespace: ' ', '\t', '\f', '\v'. Note that this returns false for '\0'. +static inline bool isHorizontalWhitespace(unsigned char c) { + return (CharInfo[c] & CHAR_HORZ_WS) ? true : false; +} + +/// isWhitespace - Return true if this character is horizontal or vertical +/// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'. Note that this returns false +/// for '\0'. +static inline bool isWhitespace(unsigned char c) { + return (CharInfo[c] & (CHAR_HORZ_WS|CHAR_VERT_WS)) ? true : false; +} + +/// isNumberBody - Return true if this is the body character of an +/// preprocessing number, which is [a-zA-Z0-9_.]. +static inline bool isNumberBody(unsigned char c) { + return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD)) ? + true : false; +} + + +//===----------------------------------------------------------------------===// +// Diagnostics forwarding code. +//===----------------------------------------------------------------------===// + +/// GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the +/// lexer buffer was all instantiated at a single point, perform the mapping. +/// This is currently only used for _Pragma implementation, so it is the slow +/// path of the hot getSourceLocation method. Do not allow it to be inlined. +static SourceLocation GetMappedTokenLoc(Preprocessor &PP, + SourceLocation FileLoc, + unsigned CharNo) DISABLE_INLINE; +static SourceLocation GetMappedTokenLoc(Preprocessor &PP, + SourceLocation FileLoc, + unsigned CharNo) { + // Otherwise, we're lexing "mapped tokens". This is used for things like + // _Pragma handling. Combine the instantiation location of FileLoc with the + // physical location. + SourceManager &SourceMgr = PP.getSourceManager(); + + // Create a new SLoc which is expanded from logical(FileLoc) but whose + // characters come from phys(FileLoc)+Offset. + SourceLocation VirtLoc = SourceMgr.getLogicalLoc(FileLoc); + SourceLocation PhysLoc = SourceMgr.getPhysicalLoc(FileLoc); + PhysLoc = SourceLocation::getFileLoc(PhysLoc.getFileID(), CharNo); + return SourceMgr.getInstantiationLoc(PhysLoc, VirtLoc); +} + +/// getSourceLocation - Return a source location identifier for the specified +/// offset in the current file. +SourceLocation Lexer::getSourceLocation(const char *Loc) const { + assert(Loc >= BufferStart && Loc <= BufferEnd && + "Location out of range for this buffer!"); + + // In the normal case, we're just lexing from a simple file buffer, return + // the file id from FileLoc with the offset specified. + unsigned CharNo = Loc-BufferStart; + if (FileLoc.isFileID()) + return SourceLocation::getFileLoc(FileLoc.getFileID(), CharNo); + + assert(PP && "This doesn't work on raw lexers"); + return GetMappedTokenLoc(*PP, FileLoc, CharNo); +} + +/// Diag - Forwarding function for diagnostics. This translate a source +/// position in the current buffer into a SourceLocation object for rendering. +void Lexer::Diag(const char *Loc, unsigned DiagID, + const std::string &Msg) const { + if (LexingRawMode && Diagnostic::isBuiltinNoteWarningOrExtension(DiagID)) + return; + PP->Diag(getSourceLocation(Loc), DiagID, Msg); +} +void Lexer::Diag(SourceLocation Loc, unsigned DiagID, + const std::string &Msg) const { + if (LexingRawMode && Diagnostic::isBuiltinNoteWarningOrExtension(DiagID)) + return; + PP->Diag(Loc, DiagID, Msg); +} + + +//===----------------------------------------------------------------------===// +// Trigraph and Escaped Newline Handling Code. +//===----------------------------------------------------------------------===// + +/// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, +/// return the decoded trigraph letter it corresponds to, or '\0' if nothing. +static char GetTrigraphCharForLetter(char Letter) { + switch (Letter) { + default: return 0; + case '=': return '#'; + case ')': return ']'; + case '(': return '['; + case '!': return '|'; + case '\'': return '^'; + case '>': return '}'; + case '/': return '\\'; + case '<': return '{'; + case '-': return '~'; + } +} + +/// DecodeTrigraphChar - If the specified character is a legal trigraph when +/// prefixed with ??, emit a trigraph warning. If trigraphs are enabled, +/// return the result character. Finally, emit a warning about trigraph use +/// whether trigraphs are enabled or not. +static char DecodeTrigraphChar(const char *CP, Lexer *L) { + char Res = GetTrigraphCharForLetter(*CP); + if (Res && L) { + if (!L->getFeatures().Trigraphs) { + L->Diag(CP-2, diag::trigraph_ignored); + return 0; + } else { + L->Diag(CP-2, diag::trigraph_converted, std::string()+Res); + } + } + return Res; +} + +/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer, +/// get its size, and return it. This is tricky in several cases: +/// 1. If currently at the start of a trigraph, we warn about the trigraph, +/// then either return the trigraph (skipping 3 chars) or the '?', +/// depending on whether trigraphs are enabled or not. +/// 2. If this is an escaped newline (potentially with whitespace between +/// the backslash and newline), implicitly skip the newline and return +/// the char after it. +/// 3. If this is a UCN, return it. FIXME: C++ UCN's? +/// +/// This handles the slow/uncommon case of the getCharAndSize method. Here we +/// know that we can accumulate into Size, and that we have already incremented +/// Ptr by Size bytes. +/// +/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should +/// be updated to match. +/// +char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, + Token *Tok) { + // If we have a slash, look for an escaped newline. + if (Ptr[0] == '\\') { + ++Size; + ++Ptr; +Slash: + // Common case, backslash-char where the char is not whitespace. + if (!isWhitespace(Ptr[0])) return '\\'; + + // See if we have optional whitespace characters followed by a newline. + { + unsigned SizeTmp = 0; + do { + ++SizeTmp; + if (Ptr[SizeTmp-1] == '\n' || Ptr[SizeTmp-1] == '\r') { + // Remember that this token needs to be cleaned. + if (Tok) Tok->setFlag(Token::NeedsCleaning); + + // Warn if there was whitespace between the backslash and newline. + if (SizeTmp != 1 && Tok) + Diag(Ptr, diag::backslash_newline_space); + + // If this is a \r\n or \n\r, skip the newlines. + if ((Ptr[SizeTmp] == '\r' || Ptr[SizeTmp] == '\n') && + Ptr[SizeTmp-1] != Ptr[SizeTmp]) + ++SizeTmp; + + // Found backslash<whitespace><newline>. Parse the char after it. + Size += SizeTmp; + Ptr += SizeTmp; + // Use slow version to accumulate a correct size field. + return getCharAndSizeSlow(Ptr, Size, Tok); + } + } while (isWhitespace(Ptr[SizeTmp])); + } + + // Otherwise, this is not an escaped newline, just return the slash. + return '\\'; + } + + // If this is a trigraph, process it. + if (Ptr[0] == '?' && Ptr[1] == '?') { + // If this is actually a legal trigraph (not something like "??x"), emit + // a trigraph warning. If so, and if trigraphs are enabled, return it. + if (char C = DecodeTrigraphChar(Ptr+2, Tok ? this : 0)) { + // Remember that this token needs to be cleaned. + if (Tok) Tok->setFlag(Token::NeedsCleaning); + + Ptr += 3; + Size += 3; + if (C == '\\') goto Slash; + return C; + } + } + + // If this is neither, return a single character. + ++Size; + return *Ptr; +} + + +/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the +/// getCharAndSizeNoWarn method. Here we know that we can accumulate into Size, +/// and that we have already incremented Ptr by Size bytes. +/// +/// NOTE: When this method is updated, getCharAndSizeSlow (above) should +/// be updated to match. +char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, + const LangOptions &Features) { + // If we have a slash, look for an escaped newline. + if (Ptr[0] == '\\') { + ++Size; + ++Ptr; +Slash: + // Common case, backslash-char where the char is not whitespace. + if (!isWhitespace(Ptr[0])) return '\\'; + + // See if we have optional whitespace characters followed by a newline. + { + unsigned SizeTmp = 0; + do { + ++SizeTmp; + if (Ptr[SizeTmp-1] == '\n' || Ptr[SizeTmp-1] == '\r') { + + // If this is a \r\n or \n\r, skip the newlines. + if ((Ptr[SizeTmp] == '\r' || Ptr[SizeTmp] == '\n') && + Ptr[SizeTmp-1] != Ptr[SizeTmp]) + ++SizeTmp; + + // Found backslash<whitespace><newline>. Parse the char after it. + Size += SizeTmp; + Ptr += SizeTmp; + + // Use slow version to accumulate a correct size field. + return getCharAndSizeSlowNoWarn(Ptr, Size, Features); + } + } while (isWhitespace(Ptr[SizeTmp])); + } + + // Otherwise, this is not an escaped newline, just return the slash. + return '\\'; + } + + // If this is a trigraph, process it. + if (Features.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') { + // If this is actually a legal trigraph (not something like "??x"), return + // it. + if (char C = GetTrigraphCharForLetter(Ptr[2])) { + Ptr += 3; + Size += 3; + if (C == '\\') goto Slash; + return C; + } + } + + // If this is neither, return a single character. + ++Size; + return *Ptr; +} + +//===----------------------------------------------------------------------===// +// Helper methods for lexing. +//===----------------------------------------------------------------------===// + +void Lexer::LexIdentifier(Token &Result, const char *CurPtr) { + // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$] + unsigned Size; + unsigned char C = *CurPtr++; + while (isIdentifierBody(C)) { + C = *CurPtr++; + } + --CurPtr; // Back up over the skipped character. + + // Fast path, no $,\,? in identifier found. '\' might be an escaped newline + // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN. + // FIXME: UCNs. + if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) { +FinishIdentifier: + const char *IdStart = BufferPtr; + FormTokenWithChars(Result, CurPtr); + Result.setKind(tok::identifier); + + // If we are in raw mode, return this identifier raw. There is no need to + // look up identifier information or attempt to macro expand it. + if (LexingRawMode) return; + + // Fill in Result.IdentifierInfo, looking up the identifier in the + // identifier table. + PP->LookUpIdentifierInfo(Result, IdStart); + + // Finally, now that we know we have an identifier, pass this off to the + // preprocessor, which may macro expand it or something. + return PP->HandleIdentifier(Result); + } + + // Otherwise, $,\,? in identifier found. Enter slower path. + + C = getCharAndSize(CurPtr, Size); + while (1) { + if (C == '$') { + // If we hit a $ and they are not supported in identifiers, we are done. + if (!Features.DollarIdents) goto FinishIdentifier; + + // Otherwise, emit a diagnostic and continue. + Diag(CurPtr, diag::ext_dollar_in_identifier); + CurPtr = ConsumeChar(CurPtr, Size, Result); + C = getCharAndSize(CurPtr, Size); + continue; + } else if (!isIdentifierBody(C)) { // FIXME: UCNs. + // Found end of identifier. + goto FinishIdentifier; + } + + // Otherwise, this character is good, consume it. + CurPtr = ConsumeChar(CurPtr, Size, Result); + + C = getCharAndSize(CurPtr, Size); + while (isIdentifierBody(C)) { // FIXME: UCNs. + CurPtr = ConsumeChar(CurPtr, Size, Result); + C = getCharAndSize(CurPtr, Size); + } + } +} + + +/// LexNumericConstant - Lex the remainer of a integer or floating point +/// constant. From[-1] is the first character lexed. Return the end of the +/// constant. +void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { + unsigned Size; + char C = getCharAndSize(CurPtr, Size); + char PrevCh = 0; + while (isNumberBody(C)) { // FIXME: UCNs? + CurPtr = ConsumeChar(CurPtr, Size, Result); + PrevCh = C; + C = getCharAndSize(CurPtr, Size); + } + + // If we fell out, check for a sign, due to 1e+12. If we have one, continue. + if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) + return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); + + // If we have a hex FP constant, continue. + if (Features.HexFloats && + (C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p')) + return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); + + Result.setKind(tok::numeric_constant); + + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); +} + +/// LexStringLiteral - Lex the remainder of a string literal, after having lexed +/// either " or L". +void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide){ + const char *NulCharacter = 0; // Does this string contain the \0 character? + + char C = getAndAdvanceChar(CurPtr, Result); + while (C != '"') { + // Skip escaped characters. + if (C == '\\') { + // Skip the escaped character. + C = getAndAdvanceChar(CurPtr, Result); + } else if (C == '\n' || C == '\r' || // Newline. + (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. + if (!LexingRawMode) Diag(BufferPtr, diag::err_unterminated_string); + Result.setKind(tok::unknown); + FormTokenWithChars(Result, CurPtr-1); + return; + } else if (C == 0) { + NulCharacter = CurPtr-1; + } + C = getAndAdvanceChar(CurPtr, Result); + } + + // If a nul character existed in the string, warn about it. + if (NulCharacter) Diag(NulCharacter, diag::null_in_string); + + Result.setKind(Wide ? tok::wide_string_literal : tok::string_literal); + + // Update the location of the token as well as the BufferPtr instance var. + FormTokenWithChars(Result, CurPtr); +} + +/// LexAngledStringLiteral - Lex the remainder of an angled string literal, +/// after having lexed the '<' character. This is used for #include filenames. +void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { + const char *NulCharacter = 0; // Does this string contain the \0 character? + + char C = getAndAdvanceChar(CurPtr, Result); + while (C != '>') { + // Skip escaped characters. + if (C == '\\') { + // Skip the escaped character. + C = getAndAdvanceChar(CurPtr, Result); + } else if (C == '\n' || C == '\r' || // Newline. + (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. + if (!LexingRawMode) Diag(BufferPtr, diag::err_unterminated_string); + Result.setKind(tok::unknown); + FormTokenWithChars(Result, CurPtr-1); + return; + } else if (C == 0) { + NulCharacter = CurPtr-1; + } + C = getAndAdvanceChar(CurPtr, Result); + } + + // If a nul character existed in the string, warn about it. + if (NulCharacter) Diag(NulCharacter, diag::null_in_string); + + Result.setKind(tok::angle_string_literal); + + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); +} + + +/// LexCharConstant - Lex the remainder of a character constant, after having +/// lexed either ' or L'. +void Lexer::LexCharConstant(Token &Result, const char *CurPtr) { + const char *NulCharacter = 0; // Does this character contain the \0 character? + + // Handle the common case of 'x' and '\y' efficiently. + char C = getAndAdvanceChar(CurPtr, Result); + if (C == '\'') { + if (!LexingRawMode) Diag(BufferPtr, diag::err_empty_character); + Result.setKind(tok::unknown); + FormTokenWithChars(Result, CurPtr); + return; + } else if (C == '\\') { + // Skip the escaped character. + // FIXME: UCN's. + C = getAndAdvanceChar(CurPtr, Result); + } + + if (C && C != '\n' && C != '\r' && CurPtr[0] == '\'') { + ++CurPtr; + } else { + // Fall back on generic code for embedded nulls, newlines, wide chars. + do { + // Skip escaped characters. + if (C == '\\') { + // Skip the escaped character. + C = getAndAdvanceChar(CurPtr, Result); + } else if (C == '\n' || C == '\r' || // Newline. + (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. + if (!LexingRawMode) Diag(BufferPtr, diag::err_unterminated_char); + Result.setKind(tok::unknown); + FormTokenWithChars(Result, CurPtr-1); + return; + } else if (C == 0) { + NulCharacter = CurPtr-1; + } + C = getAndAdvanceChar(CurPtr, Result); + } while (C != '\''); + } + + if (NulCharacter) Diag(NulCharacter, diag::null_in_char); + + Result.setKind(tok::char_constant); + + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); +} + +/// SkipWhitespace - Efficiently skip over a series of whitespace characters. +/// Update BufferPtr to point to the next non-whitespace character and return. +void Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { + // Whitespace - Skip it, then return the token after the whitespace. + unsigned char Char = *CurPtr; // Skip consequtive spaces efficiently. + while (1) { + // Skip horizontal whitespace very aggressively. + while (isHorizontalWhitespace(Char)) + Char = *++CurPtr; + + // Otherwise if we something other than whitespace, we're done. + if (Char != '\n' && Char != '\r') + break; + + if (ParsingPreprocessorDirective) { + // End of preprocessor directive line, let LexTokenInternal handle this. + BufferPtr = CurPtr; + return; + } + + // ok, but handle newline. + // The returned token is at the start of the line. + Result.setFlag(Token::StartOfLine); + // No leading whitespace seen so far. + Result.clearFlag(Token::LeadingSpace); + Char = *++CurPtr; + } + + // If this isn't immediately after a newline, there is leading space. + char PrevChar = CurPtr[-1]; + if (PrevChar != '\n' && PrevChar != '\r') + Result.setFlag(Token::LeadingSpace); + + BufferPtr = CurPtr; +} + +// SkipBCPLComment - We have just read the // characters from input. Skip until +// we find the newline character thats terminate the comment. Then update +/// BufferPtr and return. +bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { + // If BCPL comments aren't explicitly enabled for this language, emit an + // extension warning. + if (!Features.BCPLComment) { + Diag(BufferPtr, diag::ext_bcpl_comment); + + // Mark them enabled so we only emit one warning for this translation + // unit. + Features.BCPLComment = true; + } + + // Scan over the body of the comment. The common case, when scanning, is that + // the comment contains normal ascii characters with nothing interesting in + // them. As such, optimize for this case with the inner loop. + char C; + do { + C = *CurPtr; + // FIXME: Speedup BCPL comment lexing. Just scan for a \n or \r character. + // If we find a \n character, scan backwards, checking to see if it's an + // escaped newline, like we do for block comments. + + // Skip over characters in the fast loop. + while (C != 0 && // Potentially EOF. + C != '\\' && // Potentially escaped newline. + C != '?' && // Potentially trigraph. + C != '\n' && C != '\r') // Newline or DOS-style newline. + C = *++CurPtr; + + // If this is a newline, we're done. + if (C == '\n' || C == '\r') + break; // Found the newline? Break out! + + // Otherwise, this is a hard case. Fall back on getAndAdvanceChar to + // properly decode the character. + const char *OldPtr = CurPtr; + C = getAndAdvanceChar(CurPtr, Result); + + // If we read multiple characters, and one of those characters was a \r or + // \n, then we had an escaped newline within the comment. Emit diagnostic + // unless the next line is also a // comment. + if (CurPtr != OldPtr+1 && C != '/' && CurPtr[0] != '/') { + for (; OldPtr != CurPtr; ++OldPtr) + if (OldPtr[0] == '\n' || OldPtr[0] == '\r') { + // Okay, we found a // comment that ends in a newline, if the next + // line is also a // comment, but has spaces, don't emit a diagnostic. + if (isspace(C)) { + const char *ForwardPtr = CurPtr; + while (isspace(*ForwardPtr)) // Skip whitespace. + ++ForwardPtr; + if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/') + break; + } + + Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment); + break; + } + } + + if (CurPtr == BufferEnd+1) { --CurPtr; break; } + } while (C != '\n' && C != '\r'); + + // Found but did not consume the newline. + + // If we are returning comments as tokens, return this comment as a token. + if (KeepCommentMode) + return SaveBCPLComment(Result, CurPtr); + + // If we are inside a preprocessor directive and we see the end of line, + // return immediately, so that the lexer can return this as an EOM token. + if (ParsingPreprocessorDirective || CurPtr == BufferEnd) { + BufferPtr = CurPtr; + return true; + } + + // Otherwise, eat the \n character. We don't care if this is a \n\r or + // \r\n sequence. + ++CurPtr; + + // The next returned token is at the start of the line. + Result.setFlag(Token::StartOfLine); + // No leading whitespace seen so far. + Result.clearFlag(Token::LeadingSpace); + BufferPtr = CurPtr; + return true; +} + +/// SaveBCPLComment - If in save-comment mode, package up this BCPL comment in +/// an appropriate way and return it. +bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) { + Result.setKind(tok::comment); + FormTokenWithChars(Result, CurPtr); + + // If this BCPL-style comment is in a macro definition, transmogrify it into + // a C-style block comment. + if (ParsingPreprocessorDirective) { + std::string Spelling = PP->getSpelling(Result); + assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?"); + Spelling[1] = '*'; // Change prefix to "/*". + Spelling += "*/"; // add suffix. + + Result.setLocation(PP->CreateString(&Spelling[0], Spelling.size(), + Result.getLocation())); + Result.setLength(Spelling.size()); + } + return false; +} + +/// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline +/// character (either \n or \r) is part of an escaped newline sequence. Issue a +/// diagnostic if so. We know that the is inside of a block comment. +static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, + Lexer *L) { + assert(CurPtr[0] == '\n' || CurPtr[0] == '\r'); + + // Back up off the newline. + --CurPtr; + + // If this is a two-character newline sequence, skip the other character. + if (CurPtr[0] == '\n' || CurPtr[0] == '\r') { + // \n\n or \r\r -> not escaped newline. + if (CurPtr[0] == CurPtr[1]) + return false; + // \n\r or \r\n -> skip the newline. + --CurPtr; + } + + // If we have horizontal whitespace, skip over it. We allow whitespace + // between the slash and newline. + bool HasSpace = false; + while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) { + --CurPtr; + HasSpace = true; + } + + // If we have a slash, we know this is an escaped newline. + if (*CurPtr == '\\') { + if (CurPtr[-1] != '*') return false; + } else { + // It isn't a slash, is it the ?? / trigraph? + if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' || + CurPtr[-3] != '*') + return false; + + // This is the trigraph ending the comment. Emit a stern warning! + CurPtr -= 2; + + // If no trigraphs are enabled, warn that we ignored this trigraph and + // ignore this * character. + if (!L->getFeatures().Trigraphs) { + L->Diag(CurPtr, diag::trigraph_ignored_block_comment); + return false; + } + L->Diag(CurPtr, diag::trigraph_ends_block_comment); + } + + // Warn about having an escaped newline between the */ characters. + L->Diag(CurPtr, diag::escaped_newline_block_comment_end); + + // If there was space between the backslash and newline, warn about it. + if (HasSpace) L->Diag(CurPtr, diag::backslash_newline_space); + + return true; +} + +#ifdef __SSE2__ +#include <emmintrin.h> +#elif __ALTIVEC__ +#include <altivec.h> +#undef bool +#endif + +/// SkipBlockComment - We have just read the /* characters from input. Read +/// until we find the */ characters that terminate the comment. Note that we +/// don't bother decoding trigraphs or escaped newlines in block comments, +/// because they cannot cause the comment to end. The only thing that can +/// happen is the comment could end with an escaped newline between the */ end +/// of comment. +bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { + // Scan one character past where we should, looking for a '/' character. Once + // we find it, check to see if it was preceeded by a *. This common + // optimization helps people who like to put a lot of * characters in their + // comments. + + // The first character we get with newlines and trigraphs skipped to handle + // the degenerate /*/ case below correctly if the * has an escaped newline + // after it. + unsigned CharSize; + unsigned char C = getCharAndSize(CurPtr, CharSize); + CurPtr += CharSize; + if (C == 0 && CurPtr == BufferEnd+1) { + Diag(BufferPtr, diag::err_unterminated_block_comment); + BufferPtr = CurPtr-1; + return true; + } + + // Check to see if the first character after the '/*' is another /. If so, + // then this slash does not end the block comment, it is part of it. + if (C == '/') + C = *CurPtr++; + + while (1) { + // Skip over all non-interesting characters until we find end of buffer or a + // (probably ending) '/' character. + if (CurPtr + 24 < BufferEnd) { + // While not aligned to a 16-byte boundary. + while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0) + C = *CurPtr++; + + if (C == '/') goto FoundSlash; + +#ifdef __SSE2__ + __m128i Slashes = _mm_set_epi8('/', '/', '/', '/', '/', '/', '/', '/', + '/', '/', '/', '/', '/', '/', '/', '/'); + while (CurPtr+16 <= BufferEnd && + _mm_movemask_epi8(_mm_cmpeq_epi8(*(__m128i*)CurPtr, Slashes)) == 0) + CurPtr += 16; +#elif __ALTIVEC__ + __vector unsigned char Slashes = { + '/', '/', '/', '/', '/', '/', '/', '/', + '/', '/', '/', '/', '/', '/', '/', '/' + }; + while (CurPtr+16 <= BufferEnd && + !vec_any_eq(*(vector unsigned char*)CurPtr, Slashes)) + CurPtr += 16; +#else + // Scan for '/' quickly. Many block comments are very large. + while (CurPtr[0] != '/' && + CurPtr[1] != '/' && + CurPtr[2] != '/' && + CurPtr[3] != '/' && + CurPtr+4 < BufferEnd) { + CurPtr += 4; + } +#endif + + // It has to be one of the bytes scanned, increment to it and read one. + C = *CurPtr++; + } + + // Loop to scan the remainder. + while (C != '/' && C != '\0') + C = *CurPtr++; + + FoundSlash: + if (C == '/') { + if (CurPtr[-2] == '*') // We found the final */. We're done! + break; + + if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) { + if (isEndOfBlockCommentWithEscapedNewLine(CurPtr-2, this)) { + // We found the final */, though it had an escaped newline between the + // * and /. We're done! + break; + } + } + if (CurPtr[0] == '*' && CurPtr[1] != '/') { + // If this is a /* inside of the comment, emit a warning. Don't do this + // if this is a /*/, which will end the comment. This misses cases with + // embedded escaped newlines, but oh well. + Diag(CurPtr-1, diag::nested_block_comment); + } + } else if (C == 0 && CurPtr == BufferEnd+1) { + Diag(BufferPtr, diag::err_unterminated_block_comment); + // Note: the user probably forgot a */. We could continue immediately + // after the /*, but this would involve lexing a lot of what really is the + // comment, which surely would confuse the parser. + BufferPtr = CurPtr-1; + return true; + } + C = *CurPtr++; + } + + // If we are returning comments as tokens, return this comment as a token. + if (KeepCommentMode) { + Result.setKind(tok::comment); + FormTokenWithChars(Result, CurPtr); + return false; + } + + // It is common for the tokens immediately after a /**/ comment to be + // whitespace. Instead of going through the big switch, handle it + // efficiently now. + if (isHorizontalWhitespace(*CurPtr)) { + Result.setFlag(Token::LeadingSpace); + SkipWhitespace(Result, CurPtr+1); + return true; + } + + // Otherwise, just return so that the next character will be lexed as a token. + BufferPtr = CurPtr; + Result.setFlag(Token::LeadingSpace); + return true; +} + +//===----------------------------------------------------------------------===// +// Primary Lexing Entry Points +//===----------------------------------------------------------------------===// + +/// LexIncludeFilename - After the preprocessor has parsed a #include, lex and +/// (potentially) macro expand the filename. +void Lexer::LexIncludeFilename(Token &FilenameTok) { + assert(ParsingPreprocessorDirective && + ParsingFilename == false && + "Must be in a preprocessing directive!"); + + // We are now parsing a filename! + ParsingFilename = true; + + // Lex the filename. + Lex(FilenameTok); + + // We should have obtained the filename now. + ParsingFilename = false; + + // No filename? + if (FilenameTok.is(tok::eom)) + Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); +} + +/// ReadToEndOfLine - Read the rest of the current preprocessor line as an +/// uninterpreted string. This switches the lexer out of directive mode. +std::string Lexer::ReadToEndOfLine() { + assert(ParsingPreprocessorDirective && ParsingFilename == false && + "Must be in a preprocessing directive!"); + std::string Result; + Token Tmp; + + // CurPtr - Cache BufferPtr in an automatic variable. + const char *CurPtr = BufferPtr; + while (1) { + char Char = getAndAdvanceChar(CurPtr, Tmp); + switch (Char) { + default: + Result += Char; + break; + case 0: // Null. + // Found end of file? + if (CurPtr-1 != BufferEnd) { + // Nope, normal character, continue. + Result += Char; + break; + } + // FALL THROUGH. + case '\r': + case '\n': + // Okay, we found the end of the line. First, back up past the \0, \r, \n. + assert(CurPtr[-1] == Char && "Trigraphs for newline?"); + BufferPtr = CurPtr-1; + + // Next, lex the character, which should handle the EOM transition. + Lex(Tmp); + assert(Tmp.is(tok::eom) && "Unexpected token!"); + + // Finally, we're done, return the string we found. + return Result; + } + } +} + +/// LexEndOfFile - CurPtr points to the end of this file. Handle this +/// condition, reporting diagnostics and handling other edge cases as required. +/// This returns true if Result contains a token, false if PP.Lex should be +/// called again. +bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { + // If we hit the end of the file while parsing a preprocessor directive, + // end the preprocessor directive first. The next token returned will + // then be the end of file. + if (ParsingPreprocessorDirective) { + // Done parsing the "line". + ParsingPreprocessorDirective = false; + Result.setKind(tok::eom); + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); + + // Restore comment saving mode, in case it was disabled for directive. + KeepCommentMode = PP->getCommentRetentionState(); + return true; // Have a token. + } + + // If we are in raw mode, return this event as an EOF token. Let the caller + // that put us in raw mode handle the event. + if (LexingRawMode) { + Result.startToken(); + BufferPtr = BufferEnd; + FormTokenWithChars(Result, BufferEnd); + Result.setKind(tok::eof); + return true; + } + + // Otherwise, issue diagnostics for unterminated #if and missing newline. + + // If we are in a #if directive, emit an error. + while (!ConditionalStack.empty()) { + Diag(ConditionalStack.back().IfLoc, diag::err_pp_unterminated_conditional); + ConditionalStack.pop_back(); + } + + // If the file was empty or didn't end in a newline, issue a pedwarn. + if (CurPtr[-1] != '\n' && CurPtr[-1] != '\r') + Diag(BufferEnd, diag::ext_no_newline_eof); + + BufferPtr = CurPtr; + + // Finally, let the preprocessor handle this. + return PP->HandleEndOfFile(Result); +} + +/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from +/// the specified lexer will return a tok::l_paren token, 0 if it is something +/// else and 2 if there are no more tokens in the buffer controlled by the +/// lexer. +unsigned Lexer::isNextPPTokenLParen() { + assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); + + // Switch to 'skipping' mode. This will ensure that we can lex a token + // without emitting diagnostics, disables macro expansion, and will cause EOF + // to return an EOF token instead of popping the include stack. + LexingRawMode = true; + + // Save state that can be changed while lexing so that we can restore it. + const char *TmpBufferPtr = BufferPtr; + + Token Tok; + Tok.startToken(); + LexTokenInternal(Tok); + + // Restore state that may have changed. + BufferPtr = TmpBufferPtr; + + // Restore the lexer back to non-skipping mode. + LexingRawMode = false; + + if (Tok.is(tok::eof)) + return 2; + return Tok.is(tok::l_paren); +} + + +/// LexTokenInternal - This implements a simple C family lexer. It is an +/// extremely performance critical piece of code. This assumes that the buffer +/// has a null character at the end of the file. Return true if an error +/// occurred and compilation should terminate, false if normal. This returns a +/// preprocessing token, not a normal token, as such, it is an internal +/// interface. It assumes that the Flags of result have been cleared before +/// calling this. +void Lexer::LexTokenInternal(Token &Result) { +LexNextToken: + // New token, can't need cleaning yet. + Result.clearFlag(Token::NeedsCleaning); + Result.setIdentifierInfo(0); + + // CurPtr - Cache BufferPtr in an automatic variable. + const char *CurPtr = BufferPtr; + + // Small amounts of horizontal whitespace is very common between tokens. + if ((*CurPtr == ' ') || (*CurPtr == '\t')) { + ++CurPtr; + while ((*CurPtr == ' ') || (*CurPtr == '\t')) + ++CurPtr; + BufferPtr = CurPtr; + Result.setFlag(Token::LeadingSpace); + } + + unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below. + + // Read a character, advancing over it. + char Char = getAndAdvanceChar(CurPtr, Result); + switch (Char) { + case 0: // Null. + // Found end of file? + if (CurPtr-1 == BufferEnd) { + // Read the PP instance variable into an automatic variable, because + // LexEndOfFile will often delete 'this'. + Preprocessor *PPCache = PP; + if (LexEndOfFile(Result, CurPtr-1)) // Retreat back into the file. + return; // Got a token to return. + assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); + return PPCache->Lex(Result); + } + + Diag(CurPtr-1, diag::null_in_file); + Result.setFlag(Token::LeadingSpace); + SkipWhitespace(Result, CurPtr); + goto LexNextToken; // GCC isn't tail call eliminating. + case '\n': + case '\r': + // If we are inside a preprocessor directive and we see the end of line, + // we know we are done with the directive, so return an EOM token. + if (ParsingPreprocessorDirective) { + // Done parsing the "line". + ParsingPreprocessorDirective = false; + + // Restore comment saving mode, in case it was disabled for directive. + KeepCommentMode = PP->getCommentRetentionState(); + + // Since we consumed a newline, we are back at the start of a line. + IsAtStartOfLine = true; + + Result.setKind(tok::eom); + break; + } + // The returned token is at the start of the line. + Result.setFlag(Token::StartOfLine); + // No leading whitespace seen so far. + Result.clearFlag(Token::LeadingSpace); + SkipWhitespace(Result, CurPtr); + goto LexNextToken; // GCC isn't tail call eliminating. + case ' ': + case '\t': + case '\f': + case '\v': + SkipHorizontalWhitespace: + Result.setFlag(Token::LeadingSpace); + SkipWhitespace(Result, CurPtr); + + SkipIgnoredUnits: + CurPtr = BufferPtr; + + // If the next token is obviously a // or /* */ comment, skip it efficiently + // too (without going through the big switch stmt). + if (CurPtr[0] == '/' && CurPtr[1] == '/' && !KeepCommentMode) { + SkipBCPLComment(Result, CurPtr+2); + goto SkipIgnoredUnits; + } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !KeepCommentMode) { + SkipBlockComment(Result, CurPtr+2); + goto SkipIgnoredUnits; + } else if (isHorizontalWhitespace(*CurPtr)) { + goto SkipHorizontalWhitespace; + } + goto LexNextToken; // GCC isn't tail call eliminating. + + // C99 6.4.4.1: Integer Constants. + // C99 6.4.4.2: Floating Constants. + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + return LexNumericConstant(Result, CurPtr); + + case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz"). + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + Char = getCharAndSize(CurPtr, SizeTmp); + + // Wide string literal. + if (Char == '"') + return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), + true); + + // Wide character constant. + if (Char == '\'') + return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result)); + // FALL THROUGH, treating L like the start of an identifier. + + // C99 6.4.2: Identifiers. + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N': + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'V': case 'W': case 'X': case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + case '_': + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + return LexIdentifier(Result, CurPtr); + + case '$': // $ in identifiers. + if (Features.DollarIdents) { + Diag(CurPtr-1, diag::ext_dollar_in_identifier); + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + return LexIdentifier(Result, CurPtr); + } + + Result.setKind(tok::unknown); + break; + + // C99 6.4.4: Character Constants. + case '\'': + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + return LexCharConstant(Result, CurPtr); + + // C99 6.4.5: String Literals. + case '"': + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + return LexStringLiteral(Result, CurPtr, false); + + // C99 6.4.6: Punctuators. + case '?': + Result.setKind(tok::question); + break; + case '[': + Result.setKind(tok::l_square); + break; + case ']': + Result.setKind(tok::r_square); + break; + case '(': + Result.setKind(tok::l_paren); + break; + case ')': + Result.setKind(tok::r_paren); + break; + case '{': + Result.setKind(tok::l_brace); + break; + case '}': + Result.setKind(tok::r_brace); + break; + case '.': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char >= '0' && Char <= '9') { + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + + return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result)); + } else if (Features.CPlusPlus && Char == '*') { + Result.setKind(tok::periodstar); + CurPtr += SizeTmp; + } else if (Char == '.' && + getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') { + Result.setKind(tok::ellipsis); + CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result); + } else { + Result.setKind(tok::period); + } + break; + case '&': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '&') { + Result.setKind(tok::ampamp); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Char == '=') { + Result.setKind(tok::ampequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::amp); + } + break; + case '*': + if (getCharAndSize(CurPtr, SizeTmp) == '=') { + Result.setKind(tok::starequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::star); + } + break; + case '+': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '+') { + Result.setKind(tok::plusplus); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Char == '=') { + Result.setKind(tok::plusequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::plus); + } + break; + case '-': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '-') { + Result.setKind(tok::minusminus); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Char == '>' && Features.CPlusPlus && + getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') { + Result.setKind(tok::arrowstar); // C++ ->* + CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result); + } else if (Char == '>') { + Result.setKind(tok::arrow); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Char == '=') { + Result.setKind(tok::minusequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::minus); + } + break; + case '~': + Result.setKind(tok::tilde); + break; + case '!': + if (getCharAndSize(CurPtr, SizeTmp) == '=') { + Result.setKind(tok::exclaimequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::exclaim); + } + break; + case '/': + // 6.4.9: Comments + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '/') { // BCPL comment. + if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) { + // It is common for the tokens immediately after a // comment to be + // whitespace (indentation for the next line). Instead of going through + // the big switch, handle it efficiently now. + goto SkipIgnoredUnits; + } + return; // KeepCommentMode + } else if (Char == '*') { // /**/ comment. + if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) + goto LexNextToken; // GCC isn't tail call eliminating. + return; // KeepCommentMode + } else if (Char == '=') { + Result.setKind(tok::slashequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::slash); + } + break; + case '%': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '=') { + Result.setKind(tok::percentequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Features.Digraphs && Char == '>') { + Result.setKind(tok::r_brace); // '%>' -> '}' + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Features.Digraphs && Char == ':') { + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') { + Result.setKind(tok::hashhash); // '%:%:' -> '##' + CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result); + } else if (Char == '@' && Features.Microsoft) { // %:@ -> #@ -> Charize + Result.setKind(tok::hashat); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + Diag(BufferPtr, diag::charize_microsoft_ext); + } else { + Result.setKind(tok::hash); // '%:' -> '#' + + // We parsed a # character. If this occurs at the start of the line, + // it's actually the start of a preprocessing directive. Callback to + // the preprocessor to handle it. + // FIXME: -fpreprocessed mode?? + if (Result.isAtStartOfLine() && !LexingRawMode) { + BufferPtr = CurPtr; + PP->HandleDirective(Result); + + // As an optimization, if the preprocessor didn't switch lexers, tail + // recurse. + if (PP->isCurrentLexer(this)) { + // Start a new token. If this is a #include or something, the PP may + // want us starting at the beginning of the line again. If so, set + // the StartOfLine flag. + if (IsAtStartOfLine) { + Result.setFlag(Token::StartOfLine); + IsAtStartOfLine = false; + } + goto LexNextToken; // GCC isn't tail call eliminating. + } + + return PP->Lex(Result); + } + } + } else { + Result.setKind(tok::percent); + } + break; + case '<': + Char = getCharAndSize(CurPtr, SizeTmp); + if (ParsingFilename) { + return LexAngledStringLiteral(Result, CurPtr+SizeTmp); + } else if (Char == '<' && + getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') { + Result.setKind(tok::lesslessequal); + CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result); + } else if (Char == '<') { + Result.setKind(tok::lessless); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Char == '=') { + Result.setKind(tok::lessequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Features.Digraphs && Char == ':') { + Result.setKind(tok::l_square); // '<:' -> '[' + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Features.Digraphs && Char == '%') { + Result.setKind(tok::l_brace); // '<%' -> '{' + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::less); + } + break; + case '>': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '=') { + Result.setKind(tok::greaterequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Char == '>' && + getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') { + Result.setKind(tok::greatergreaterequal); + CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result); + } else if (Char == '>') { + Result.setKind(tok::greatergreater); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::greater); + } + break; + case '^': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '=') { + Result.setKind(tok::caretequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::caret); + } + break; + case '|': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '=') { + Result.setKind(tok::pipeequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Char == '|') { + Result.setKind(tok::pipepipe); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::pipe); + } + break; + case ':': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Features.Digraphs && Char == '>') { + Result.setKind(tok::r_square); // ':>' -> ']' + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Features.CPlusPlus && Char == ':') { + Result.setKind(tok::coloncolon); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::colon); + } + break; + case ';': + Result.setKind(tok::semi); + break; + case '=': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '=') { + Result.setKind(tok::equalequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::equal); + } + break; + case ',': + Result.setKind(tok::comma); + break; + case '#': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '#') { + Result.setKind(tok::hashhash); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Char == '@' && Features.Microsoft) { // #@ -> Charize + Result.setKind(tok::hashat); + Diag(BufferPtr, diag::charize_microsoft_ext); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::hash); + // We parsed a # character. If this occurs at the start of the line, + // it's actually the start of a preprocessing directive. Callback to + // the preprocessor to handle it. + // FIXME: -fpreprocessed mode?? + if (Result.isAtStartOfLine() && !LexingRawMode) { + BufferPtr = CurPtr; + PP->HandleDirective(Result); + + // As an optimization, if the preprocessor didn't switch lexers, tail + // recurse. + if (PP->isCurrentLexer(this)) { + // Start a new token. If this is a #include or something, the PP may + // want us starting at the beginning of the line again. If so, set + // the StartOfLine flag. + if (IsAtStartOfLine) { + Result.setFlag(Token::StartOfLine); + IsAtStartOfLine = false; + } + goto LexNextToken; // GCC isn't tail call eliminating. + } + return PP->Lex(Result); + } + } + break; + + case '@': + // Objective C support. + if (CurPtr[-1] == '@' && Features.ObjC1) + Result.setKind(tok::at); + else + Result.setKind(tok::unknown); + break; + + case '\\': + // FIXME: UCN's. + // FALL THROUGH. + default: + Result.setKind(tok::unknown); + break; + } + + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); +} diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp new file mode 100644 index 00000000000..aa0b831af90 --- /dev/null +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -0,0 +1,691 @@ +//===--- LiteralSupport.cpp - Code to parse and process literals ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the NumericLiteralParser, CharLiteralParser, and +// StringLiteralParser interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/LiteralSupport.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/StringExtras.h" +using namespace clang; + +/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's +/// not valid. +static int HexDigitValue(char C) { + if (C >= '0' && C <= '9') return C-'0'; + if (C >= 'a' && C <= 'f') return C-'a'+10; + if (C >= 'A' && C <= 'F') return C-'A'+10; + return -1; +} + +/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in +/// either a character or a string literal. +static unsigned ProcessCharEscape(const char *&ThisTokBuf, + const char *ThisTokEnd, bool &HadError, + SourceLocation Loc, bool IsWide, + Preprocessor &PP) { + // Skip the '\' char. + ++ThisTokBuf; + + // We know that this character can't be off the end of the buffer, because + // that would have been \", which would not have been the end of string. + unsigned ResultChar = *ThisTokBuf++; + switch (ResultChar) { + // These map to themselves. + case '\\': case '\'': case '"': case '?': break; + + // These have fixed mappings. + case 'a': + // TODO: K&R: the meaning of '\\a' is different in traditional C + ResultChar = 7; + break; + case 'b': + ResultChar = 8; + break; + case 'e': + PP.Diag(Loc, diag::ext_nonstandard_escape, "e"); + ResultChar = 27; + break; + case 'f': + ResultChar = 12; + break; + case 'n': + ResultChar = 10; + break; + case 'r': + ResultChar = 13; + break; + case 't': + ResultChar = 9; + break; + case 'v': + ResultChar = 11; + break; + + //case 'u': case 'U': // FIXME: UCNs. + case 'x': { // Hex escape. + ResultChar = 0; + if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) { + PP.Diag(Loc, diag::err_hex_escape_no_digits); + HadError = 1; + break; + } + + // Hex escapes are a maximal series of hex digits. + bool Overflow = false; + for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) { + int CharVal = HexDigitValue(ThisTokBuf[0]); + if (CharVal == -1) break; + Overflow |= (ResultChar & 0xF0000000) ? true : false; // About to shift out a digit? + ResultChar <<= 4; + ResultChar |= CharVal; + } + + // See if any bits will be truncated when evaluated as a character. + unsigned CharWidth = PP.getTargetInfo().getCharWidth(IsWide); + + if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) { + Overflow = true; + ResultChar &= ~0U >> (32-CharWidth); + } + + // Check for overflow. + if (Overflow) // Too many digits to fit in + PP.Diag(Loc, diag::warn_hex_escape_too_large); + break; + } + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': { + // Octal escapes. + --ThisTokBuf; + ResultChar = 0; + + // Octal escapes are a series of octal digits with maximum length 3. + // "\0123" is a two digit sequence equal to "\012" "3". + unsigned NumDigits = 0; + do { + ResultChar <<= 3; + ResultChar |= *ThisTokBuf++ - '0'; + ++NumDigits; + } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 && + ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7'); + + // Check for overflow. Reject '\777', but not L'\777'. + unsigned CharWidth = PP.getTargetInfo().getCharWidth(IsWide); + + if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) { + PP.Diag(Loc, diag::warn_octal_escape_too_large); + ResultChar &= ~0U >> (32-CharWidth); + } + break; + } + + // Otherwise, these are not valid escapes. + case '(': case '{': case '[': case '%': + // GCC accepts these as extensions. We warn about them as such though. + if (!PP.getLangOptions().NoExtensions) { + PP.Diag(Loc, diag::ext_nonstandard_escape, + std::string()+(char)ResultChar); + break; + } + // FALL THROUGH. + default: + if (isgraph(ThisTokBuf[0])) { + PP.Diag(Loc, diag::ext_unknown_escape, std::string()+(char)ResultChar); + } else { + PP.Diag(Loc, diag::ext_unknown_escape, "x"+llvm::utohexstr(ResultChar)); + } + break; + } + + return ResultChar; +} + + + + +/// integer-constant: [C99 6.4.4.1] +/// decimal-constant integer-suffix +/// octal-constant integer-suffix +/// hexadecimal-constant integer-suffix +/// decimal-constant: +/// nonzero-digit +/// decimal-constant digit +/// octal-constant: +/// 0 +/// octal-constant octal-digit +/// hexadecimal-constant: +/// hexadecimal-prefix hexadecimal-digit +/// hexadecimal-constant hexadecimal-digit +/// hexadecimal-prefix: one of +/// 0x 0X +/// integer-suffix: +/// unsigned-suffix [long-suffix] +/// unsigned-suffix [long-long-suffix] +/// long-suffix [unsigned-suffix] +/// long-long-suffix [unsigned-sufix] +/// nonzero-digit: +/// 1 2 3 4 5 6 7 8 9 +/// octal-digit: +/// 0 1 2 3 4 5 6 7 +/// hexadecimal-digit: +/// 0 1 2 3 4 5 6 7 8 9 +/// a b c d e f +/// A B C D E F +/// unsigned-suffix: one of +/// u U +/// long-suffix: one of +/// l L +/// long-long-suffix: one of +/// ll LL +/// +/// floating-constant: [C99 6.4.4.2] +/// TODO: add rules... +/// + +NumericLiteralParser:: +NumericLiteralParser(const char *begin, const char *end, + SourceLocation TokLoc, Preprocessor &pp) + : PP(pp), ThisTokBegin(begin), ThisTokEnd(end) { + s = DigitsBegin = begin; + saw_exponent = false; + saw_period = false; + isLong = false; + isUnsigned = false; + isLongLong = false; + isFloat = false; + isImaginary = false; + hadError = false; + + if (*s == '0') { // parse radix + s++; + if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) { + s++; + radix = 16; + DigitsBegin = s; + s = SkipHexDigits(s); + if (s == ThisTokEnd) { + // Done. + } else if (*s == '.') { + s++; + saw_period = true; + s = SkipHexDigits(s); + } + // A binary exponent can appear with or with a '.'. If dotted, the + // binary exponent is required. + if ((*s == 'p' || *s == 'P') && PP.getLangOptions().HexFloats) { + s++; + saw_exponent = true; + if (*s == '+' || *s == '-') s++; // sign + const char *first_non_digit = SkipDigits(s); + if (first_non_digit == s) { + Diag(TokLoc, diag::err_exponent_has_no_digits); + return; + } else { + s = first_non_digit; + } + } else if (saw_period) { + Diag(TokLoc, diag::err_hexconstant_requires_exponent); + return; + } + } else if (*s == 'b' || *s == 'B') { + // 0b101010 is a GCC extension. + ++s; + radix = 2; + DigitsBegin = s; + s = SkipBinaryDigits(s); + if (s == ThisTokEnd) { + // Done. + } else if (isxdigit(*s)) { + Diag(TokLoc, diag::err_invalid_binary_digit, std::string(s, s+1)); + return; + } + PP.Diag(TokLoc, diag::ext_binary_literal); + } else { + // For now, the radix is set to 8. If we discover that we have a + // floating point constant, the radix will change to 10. Octal floating + // point constants are not permitted (only decimal and hexadecimal). + radix = 8; + DigitsBegin = s; + s = SkipOctalDigits(s); + if (s == ThisTokEnd) { + // Done. + } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) { + TokLoc = PP.AdvanceToTokenCharacter(TokLoc, s-begin); + Diag(TokLoc, diag::err_invalid_octal_digit, std::string(s, s+1)); + return; + } else if (*s == '.') { + s++; + radix = 10; + saw_period = true; + s = SkipDigits(s); + } + if (*s == 'e' || *s == 'E') { // exponent + s++; + radix = 10; + saw_exponent = true; + if (*s == '+' || *s == '-') s++; // sign + const char *first_non_digit = SkipDigits(s); + if (first_non_digit == s) { + Diag(TokLoc, diag::err_exponent_has_no_digits); + return; + } else { + s = first_non_digit; + } + } + } + } else { // the first digit is non-zero + radix = 10; + s = SkipDigits(s); + if (s == ThisTokEnd) { + // Done. + } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) { + Diag(TokLoc, diag::err_invalid_decimal_digit, std::string(s, s+1)); + return; + } else if (*s == '.') { + s++; + saw_period = true; + s = SkipDigits(s); + } + if (*s == 'e' || *s == 'E') { // exponent + s++; + saw_exponent = true; + if (*s == '+' || *s == '-') s++; // sign + const char *first_non_digit = SkipDigits(s); + if (first_non_digit == s) { + Diag(TokLoc, diag::err_exponent_has_no_digits); + return; + } else { + s = first_non_digit; + } + } + } + + SuffixBegin = s; + + // Parse the suffix. At this point we can classify whether we have an FP or + // integer constant. + bool isFPConstant = isFloatingLiteral(); + + // Loop over all of the characters of the suffix. If we see something bad, + // we break out of the loop. + for (; s != ThisTokEnd; ++s) { + switch (*s) { + case 'f': // FP Suffix for "float" + case 'F': + if (!isFPConstant) break; // Error for integer constant. + if (isFloat || isLong) break; // FF, LF invalid. + isFloat = true; + continue; // Success. + case 'u': + case 'U': + if (isFPConstant) break; // Error for floating constant. + if (isUnsigned) break; // Cannot be repeated. + isUnsigned = true; + continue; // Success. + case 'l': + case 'L': + if (isLong || isLongLong) break; // Cannot be repeated. + if (isFloat) break; // LF invalid. + + // Check for long long. The L's need to be adjacent and the same case. + if (s+1 != ThisTokEnd && s[1] == s[0]) { + if (isFPConstant) break; // long long invalid for floats. + isLongLong = true; + ++s; // Eat both of them. + } else { + isLong = true; + } + continue; // Success. + case 'i': + case 'I': + case 'j': + case 'J': + if (isImaginary) break; // Cannot be repeated. + PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin), + diag::ext_imaginary_constant); + isImaginary = true; + continue; // Success. + } + // If we reached here, there was an error. + break; + } + + // Report an error if there are any. + if (s != ThisTokEnd) { + TokLoc = PP.AdvanceToTokenCharacter(TokLoc, s-begin); + Diag(TokLoc, isFPConstant ? diag::err_invalid_suffix_float_constant : + diag::err_invalid_suffix_integer_constant, + std::string(SuffixBegin, ThisTokEnd)); + return; + } +} + +/// GetIntegerValue - Convert this numeric literal value to an APInt that +/// matches Val's input width. If there is an overflow, set Val to the low bits +/// of the result and return true. Otherwise, return false. +bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) { + Val = 0; + s = DigitsBegin; + + llvm::APInt RadixVal(Val.getBitWidth(), radix); + llvm::APInt CharVal(Val.getBitWidth(), 0); + llvm::APInt OldVal = Val; + + bool OverflowOccurred = false; + while (s < SuffixBegin) { + unsigned C = HexDigitValue(*s++); + + // If this letter is out of bound for this radix, reject it. + assert(C < radix && "NumericLiteralParser ctor should have rejected this"); + + CharVal = C; + + // Add the digit to the value in the appropriate radix. If adding in digits + // made the value smaller, then this overflowed. + OldVal = Val; + + // Multiply by radix, did overflow occur on the multiply? + Val *= RadixVal; + OverflowOccurred |= Val.udiv(RadixVal) != OldVal; + + OldVal = Val; + // Add value, did overflow occur on the value? + Val += CharVal; + OverflowOccurred |= Val.ult(OldVal); + OverflowOccurred |= Val.ult(CharVal); + } + return OverflowOccurred; +} + +llvm::APFloat NumericLiteralParser:: +GetFloatValue(const llvm::fltSemantics &Format, bool* isExact) { + using llvm::APFloat; + + llvm::SmallVector<char,256> floatChars; + for (unsigned i = 0, n = ThisTokEnd-ThisTokBegin; i != n; ++i) + floatChars.push_back(ThisTokBegin[i]); + + floatChars.push_back('\0'); + + APFloat V (Format, APFloat::fcZero, false); + APFloat::opStatus status; + + status = V.convertFromString(&floatChars[0],APFloat::rmNearestTiesToEven); + + if (isExact) + *isExact = status == APFloat::opOK; + + return V; +} + +void NumericLiteralParser::Diag(SourceLocation Loc, unsigned DiagID, + const std::string &M) { + PP.Diag(Loc, DiagID, M); + hadError = true; +} + + +CharLiteralParser::CharLiteralParser(const char *begin, const char *end, + SourceLocation Loc, Preprocessor &PP) { + // At this point we know that the character matches the regex "L?'.*'". + HadError = false; + Value = 0; + + // Determine if this is a wide character. + IsWide = begin[0] == 'L'; + if (IsWide) ++begin; + + // Skip over the entry quote. + assert(begin[0] == '\'' && "Invalid token lexed"); + ++begin; + + // FIXME: This assumes that 'int' is 32-bits in overflow calculation, and the + // size of "value". + assert(PP.getTargetInfo().getIntWidth() == 32 && + "Assumes sizeof(int) == 4 for now"); + // FIXME: This assumes that wchar_t is 32-bits for now. + assert(PP.getTargetInfo().getWCharWidth() == 32 && + "Assumes sizeof(wchar_t) == 4 for now"); + // FIXME: This extensively assumes that 'char' is 8-bits. + assert(PP.getTargetInfo().getCharWidth() == 8 && + "Assumes char is 8 bits"); + + bool isFirstChar = true; + bool isMultiChar = false; + while (begin[0] != '\'') { + unsigned ResultChar; + if (begin[0] != '\\') // If this is a normal character, consume it. + ResultChar = *begin++; + else // Otherwise, this is an escape character. + ResultChar = ProcessCharEscape(begin, end, HadError, Loc, IsWide, PP); + + // If this is a multi-character constant (e.g. 'abc'), handle it. These are + // implementation defined (C99 6.4.4.4p10). + if (!isFirstChar) { + // If this is the second character being processed, do special handling. + if (!isMultiChar) { + isMultiChar = true; + + // Warn about discarding the top bits for multi-char wide-character + // constants (L'abcd'). + if (IsWide) + PP.Diag(Loc, diag::warn_extraneous_wide_char_constant); + } + + if (IsWide) { + // Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'. + Value = 0; + } else { + // Narrow character literals act as though their value is concatenated + // in this implementation. + if (((Value << 8) >> 8) != Value) + PP.Diag(Loc, diag::warn_char_constant_too_large); + Value <<= 8; + } + } + + Value += ResultChar; + isFirstChar = false; + } + + // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1") + // if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple + // character constants are not sign extended in the this implementation: + // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC. + if (!IsWide && !isMultiChar && (Value & 128) && + PP.getTargetInfo().isCharSigned()) + Value = (signed char)Value; +} + + +/// string-literal: [C99 6.4.5] +/// " [s-char-sequence] " +/// L" [s-char-sequence] " +/// s-char-sequence: +/// s-char +/// s-char-sequence s-char +/// s-char: +/// any source character except the double quote ", +/// backslash \, or newline character +/// escape-character +/// universal-character-name +/// escape-character: [C99 6.4.4.4] +/// \ escape-code +/// universal-character-name +/// escape-code: +/// character-escape-code +/// octal-escape-code +/// hex-escape-code +/// character-escape-code: one of +/// n t b r f v a +/// \ ' " ? +/// octal-escape-code: +/// octal-digit +/// octal-digit octal-digit +/// octal-digit octal-digit octal-digit +/// hex-escape-code: +/// x hex-digit +/// hex-escape-code hex-digit +/// universal-character-name: +/// \u hex-quad +/// \U hex-quad hex-quad +/// hex-quad: +/// hex-digit hex-digit hex-digit hex-digit +/// +StringLiteralParser:: +StringLiteralParser(const Token *StringToks, unsigned NumStringToks, + Preprocessor &pp, TargetInfo &t) + : PP(pp), Target(t) { + // Scan all of the string portions, remember the max individual token length, + // computing a bound on the concatenated string length, and see whether any + // piece is a wide-string. If any of the string portions is a wide-string + // literal, the result is a wide-string literal [C99 6.4.5p4]. + MaxTokenLength = StringToks[0].getLength(); + SizeBound = StringToks[0].getLength()-2; // -2 for "". + AnyWide = StringToks[0].is(tok::wide_string_literal); + + hadError = false; + + // Implement Translation Phase #6: concatenation of string literals + /// (C99 5.1.1.2p1). The common case is only one string fragment. + for (unsigned i = 1; i != NumStringToks; ++i) { + // The string could be shorter than this if it needs cleaning, but this is a + // reasonable bound, which is all we need. + SizeBound += StringToks[i].getLength()-2; // -2 for "". + + // Remember maximum string piece length. + if (StringToks[i].getLength() > MaxTokenLength) + MaxTokenLength = StringToks[i].getLength(); + + // Remember if we see any wide strings. + AnyWide |= StringToks[i].is(tok::wide_string_literal); + } + + + // Include space for the null terminator. + ++SizeBound; + + // TODO: K&R warning: "traditional C rejects string constant concatenation" + + // Get the width in bytes of wchar_t. If no wchar_t strings are used, do not + // query the target. As such, wchar_tByteWidth is only valid if AnyWide=true. + wchar_tByteWidth = ~0U; + if (AnyWide) { + wchar_tByteWidth = Target.getWCharWidth(); + assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!"); + wchar_tByteWidth /= 8; + } + + // The output buffer size needs to be large enough to hold wide characters. + // This is a worst-case assumption which basically corresponds to L"" "long". + if (AnyWide) + SizeBound *= wchar_tByteWidth; + + // Size the temporary buffer to hold the result string data. + ResultBuf.resize(SizeBound); + + // Likewise, but for each string piece. + llvm::SmallString<512> TokenBuf; + TokenBuf.resize(MaxTokenLength); + + // Loop over all the strings, getting their spelling, and expanding them to + // wide strings as appropriate. + ResultPtr = &ResultBuf[0]; // Next byte to fill in. + + Pascal = false; + + for (unsigned i = 0, e = NumStringToks; i != e; ++i) { + const char *ThisTokBuf = &TokenBuf[0]; + // Get the spelling of the token, which eliminates trigraphs, etc. We know + // that ThisTokBuf points to a buffer that is big enough for the whole token + // and 'spelled' tokens can only shrink. + unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf); + const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote. + + // TODO: Input character set mapping support. + + // Skip L marker for wide strings. + bool ThisIsWide = false; + if (ThisTokBuf[0] == 'L') { + ++ThisTokBuf; + ThisIsWide = true; + } + + assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?"); + ++ThisTokBuf; + + // Check if this is a pascal string + if (pp.getLangOptions().PascalStrings && ThisTokBuf + 1 != ThisTokEnd && + ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') { + + // If the \p sequence is found in the first token, we have a pascal string + // Otherwise, if we already have a pascal string, ignore the first \p + if (i == 0) { + ++ThisTokBuf; + Pascal = true; + } else if (Pascal) + ThisTokBuf += 2; + } + + while (ThisTokBuf != ThisTokEnd) { + // Is this a span of non-escape characters? + if (ThisTokBuf[0] != '\\') { + const char *InStart = ThisTokBuf; + do { + ++ThisTokBuf; + } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\'); + + // Copy the character span over. + unsigned Len = ThisTokBuf-InStart; + if (!AnyWide) { + memcpy(ResultPtr, InStart, Len); + ResultPtr += Len; + } else { + // Note: our internal rep of wide char tokens is always little-endian. + for (; Len; --Len, ++InStart) { + *ResultPtr++ = InStart[0]; + // Add zeros at the end. + for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) + *ResultPtr++ = 0; + } + } + continue; + } + + // Otherwise, this is an escape character. Process it. + unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError, + StringToks[i].getLocation(), + ThisIsWide, PP); + + // Note: our internal rep of wide char tokens is always little-endian. + *ResultPtr++ = ResultChar & 0xFF; + + if (AnyWide) { + for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) + *ResultPtr++ = ResultChar >> i*8; + } + } + } + + // Add zero terminator. + *ResultPtr = 0; + if (AnyWide) { + for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) + *ResultPtr++ = 0; + } + + if (Pascal) + ResultBuf[0] = ResultPtr-&ResultBuf[0]-1; +} diff --git a/clang/lib/Lex/MacroArgs.cpp b/clang/lib/Lex/MacroArgs.cpp new file mode 100644 index 00000000000..a26e50eb762 --- /dev/null +++ b/clang/lib/Lex/MacroArgs.cpp @@ -0,0 +1,225 @@ +//===--- TokenLexer.cpp - Lex from a token stream -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TokenLexer interface. +// +//===----------------------------------------------------------------------===// + +#include "MacroArgs.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/Diagnostic.h" +using namespace clang; + +/// MacroArgs ctor function - This destroys the vector passed in. +MacroArgs *MacroArgs::create(const MacroInfo *MI, + const Token *UnexpArgTokens, + unsigned NumToks, bool VarargsElided) { + assert(MI->isFunctionLike() && + "Can't have args for an object-like macro!"); + + // Allocate memory for the MacroArgs object with the lexer tokens at the end. + MacroArgs *Result = (MacroArgs*)malloc(sizeof(MacroArgs) + + NumToks*sizeof(Token)); + // Construct the macroargs object. + new (Result) MacroArgs(NumToks, VarargsElided); + + // Copy the actual unexpanded tokens to immediately after the result ptr. + if (NumToks) + memcpy(const_cast<Token*>(Result->getUnexpArgument(0)), + UnexpArgTokens, NumToks*sizeof(Token)); + + return Result; +} + +/// destroy - Destroy and deallocate the memory for this object. +/// +void MacroArgs::destroy() { + // Run the dtor to deallocate the vectors. + this->~MacroArgs(); + // Release the memory for the object. + free(this); +} + + +/// getArgLength - Given a pointer to an expanded or unexpanded argument, +/// return the number of tokens, not counting the EOF, that make up the +/// argument. +unsigned MacroArgs::getArgLength(const Token *ArgPtr) { + unsigned NumArgTokens = 0; + for (; ArgPtr->isNot(tok::eof); ++ArgPtr) + ++NumArgTokens; + return NumArgTokens; +} + + +/// getUnexpArgument - Return the unexpanded tokens for the specified formal. +/// +const Token *MacroArgs::getUnexpArgument(unsigned Arg) const { + // The unexpanded argument tokens start immediately after the MacroArgs object + // in memory. + const Token *Start = (const Token *)(this+1); + const Token *Result = Start; + // Scan to find Arg. + for (; Arg; ++Result) { + assert(Result < Start+NumUnexpArgTokens && "Invalid arg #"); + if (Result->is(tok::eof)) + --Arg; + } + return Result; +} + + +/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected +/// by pre-expansion, return false. Otherwise, conservatively return true. +bool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok, + Preprocessor &PP) const { + // If there are no identifiers in the argument list, or if the identifiers are + // known to not be macros, pre-expansion won't modify it. + for (; ArgTok->isNot(tok::eof); ++ArgTok) + if (IdentifierInfo *II = ArgTok->getIdentifierInfo()) { + if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled()) + // Return true even though the macro could be a function-like macro + // without a following '(' token. + return true; + } + return false; +} + +/// getPreExpArgument - Return the pre-expanded form of the specified +/// argument. +const std::vector<Token> & +MacroArgs::getPreExpArgument(unsigned Arg, Preprocessor &PP) { + assert(Arg < NumUnexpArgTokens && "Invalid argument number!"); + + // If we have already computed this, return it. + if (PreExpArgTokens.empty()) + PreExpArgTokens.resize(NumUnexpArgTokens); + + std::vector<Token> &Result = PreExpArgTokens[Arg]; + if (!Result.empty()) return Result; + + const Token *AT = getUnexpArgument(Arg); + unsigned NumToks = getArgLength(AT)+1; // Include the EOF. + + // Otherwise, we have to pre-expand this argument, populating Result. To do + // this, we set up a fake TokenLexer to lex from the unexpanded argument + // list. With this installed, we lex expanded tokens until we hit the EOF + // token at the end of the unexp list. + PP.EnterTokenStream(AT, NumToks, false /*disable expand*/, + false /*owns tokens*/); + + // Lex all of the macro-expanded tokens into Result. + do { + Result.push_back(Token()); + PP.Lex(Result.back()); + } while (Result.back().isNot(tok::eof)); + + // Pop the token stream off the top of the stack. We know that the internal + // pointer inside of it is to the "end" of the token stream, but the stack + // will not otherwise be popped until the next token is lexed. The problem is + // that the token may be lexed sometime after the vector of tokens itself is + // destroyed, which would be badness. + PP.RemoveTopOfLexerStack(); + return Result; +} + + +/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of +/// tokens into the literal string token that should be produced by the C # +/// preprocessor operator. If Charify is true, then it should be turned into +/// a character literal for the Microsoft charize (#@) extension. +/// +Token MacroArgs::StringifyArgument(const Token *ArgToks, + Preprocessor &PP, bool Charify) { + Token Tok; + Tok.startToken(); + Tok.setKind(tok::string_literal); + + const Token *ArgTokStart = ArgToks; + + // Stringify all the tokens. + std::string Result = "\""; + // FIXME: Optimize this loop to not use std::strings. + bool isFirst = true; + for (; ArgToks->isNot(tok::eof); ++ArgToks) { + const Token &Tok = *ArgToks; + if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine())) + Result += ' '; + isFirst = false; + + // If this is a string or character constant, escape the token as specified + // by 6.10.3.2p2. + if (Tok.is(tok::string_literal) || // "foo" + Tok.is(tok::wide_string_literal) || // L"foo" + Tok.is(tok::char_constant)) { // 'x' and L'x'. + Result += Lexer::Stringify(PP.getSpelling(Tok)); + } else { + // Otherwise, just append the token. + Result += PP.getSpelling(Tok); + } + } + + // If the last character of the string is a \, and if it isn't escaped, this + // is an invalid string literal, diagnose it as specified in C99. + if (Result[Result.size()-1] == '\\') { + // Count the number of consequtive \ characters. If even, then they are + // just escaped backslashes, otherwise it's an error. + unsigned FirstNonSlash = Result.size()-2; + // Guaranteed to find the starting " if nothing else. + while (Result[FirstNonSlash] == '\\') + --FirstNonSlash; + if ((Result.size()-1-FirstNonSlash) & 1) { + // Diagnose errors for things like: #define F(X) #X / F(\) + PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal); + Result.erase(Result.end()-1); // remove one of the \'s. + } + } + Result += '"'; + + // If this is the charify operation and the result is not a legal character + // constant, diagnose it. + if (Charify) { + // First step, turn double quotes into single quotes: + Result[0] = '\''; + Result[Result.size()-1] = '\''; + + // Check for bogus character. + bool isBad = false; + if (Result.size() == 3) { + isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above. + } else { + isBad = (Result.size() != 4 || Result[1] != '\\'); // Not '\x' + } + + if (isBad) { + PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify); + Result = "' '"; // Use something arbitrary, but legal. + } + } + + Tok.setLength(Result.size()); + Tok.setLocation(PP.CreateString(&Result[0], Result.size())); + return Tok; +} + +/// getStringifiedArgument - Compute, cache, and return the specified argument +/// that has been 'stringified' as required by the # operator. +const Token &MacroArgs::getStringifiedArgument(unsigned ArgNo, + Preprocessor &PP) { + assert(ArgNo < NumUnexpArgTokens && "Invalid argument number!"); + if (StringifiedArgs.empty()) { + StringifiedArgs.resize(getNumArguments()); + memset(&StringifiedArgs[0], 0, + sizeof(StringifiedArgs[0])*getNumArguments()); + } + if (StringifiedArgs[ArgNo].isNot(tok::string_literal)) + StringifiedArgs[ArgNo] = StringifyArgument(getUnexpArgument(ArgNo), PP); + return StringifiedArgs[ArgNo]; +} diff --git a/clang/lib/Lex/MacroArgs.h b/clang/lib/Lex/MacroArgs.h new file mode 100644 index 00000000000..4b22fa18aa8 --- /dev/null +++ b/clang/lib/Lex/MacroArgs.h @@ -0,0 +1,109 @@ +//===--- MacroArgs.h - Formal argument info for Macros ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MacroArgs interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_MACROARGS_H +#define LLVM_CLANG_MACROARGS_H + +#include <vector> + +namespace clang { + class MacroInfo; + class Preprocessor; + class Token; + +/// MacroArgs - An instance of this class captures information about +/// the formal arguments specified to a function-like macro invocation. +class MacroArgs { + /// NumUnexpArgTokens - The number of raw, unexpanded tokens for the + /// arguments. All of the actual argument tokens are allocated immediately + /// after the MacroArgs object in memory. This is all of the arguments + /// concatenated together, with 'EOF' markers at the end of each argument. + unsigned NumUnexpArgTokens; + + /// PreExpArgTokens - Pre-expanded tokens for arguments that need them. Empty + /// if not yet computed. This includes the EOF marker at the end of the + /// stream. + std::vector<std::vector<Token> > PreExpArgTokens; + + /// StringifiedArgs - This contains arguments in 'stringified' form. If the + /// stringified form of an argument has not yet been computed, this is empty. + std::vector<Token> StringifiedArgs; + + /// VarargsElided - True if this is a C99 style varargs macro invocation and + /// there was no argument specified for the "..." argument. If the argument + /// was specified (even empty) or this isn't a C99 style varargs function, or + /// if in strict mode and the C99 varargs macro had only a ... argument, this + /// is false. + bool VarargsElided; + + MacroArgs(unsigned NumToks, bool varargsElided) + : NumUnexpArgTokens(NumToks), VarargsElided(varargsElided) {} + ~MacroArgs() {} +public: + /// MacroArgs ctor function - Create a new MacroArgs object with the specified + /// macro and argument info. + static MacroArgs *create(const MacroInfo *MI, + const Token *UnexpArgTokens, + unsigned NumArgTokens, bool VarargsElided); + + /// destroy - Destroy and deallocate the memory for this object. + /// + void destroy(); + + /// ArgNeedsPreexpansion - If we can prove that the argument won't be affected + /// by pre-expansion, return false. Otherwise, conservatively return true. + bool ArgNeedsPreexpansion(const Token *ArgTok, Preprocessor &PP) const; + + /// getUnexpArgument - Return a pointer to the first token of the unexpanded + /// token list for the specified formal. + /// + const Token *getUnexpArgument(unsigned Arg) const; + + /// getArgLength - Given a pointer to an expanded or unexpanded argument, + /// return the number of tokens, not counting the EOF, that make up the + /// argument. + static unsigned getArgLength(const Token *ArgPtr); + + /// getPreExpArgument - Return the pre-expanded form of the specified + /// argument. + const std::vector<Token> & + getPreExpArgument(unsigned Arg, Preprocessor &PP); + + /// getStringifiedArgument - Compute, cache, and return the specified argument + /// that has been 'stringified' as required by the # operator. + const Token &getStringifiedArgument(unsigned ArgNo, Preprocessor &PP); + + /// getNumArguments - Return the number of arguments passed into this macro + /// invocation. + unsigned getNumArguments() const { return NumUnexpArgTokens; } + + + /// isVarargsElidedUse - Return true if this is a C99 style varargs macro + /// invocation and there was no argument specified for the "..." argument. If + /// the argument was specified (even empty) or this isn't a C99 style varargs + /// function, or if in strict mode and the C99 varargs macro had only a ... + /// argument, this returns false. + bool isVarargsElidedUse() const { return VarargsElided; } + + /// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of + /// tokens into the literal string token that should be produced by the C # + /// preprocessor operator. If Charify is true, then it should be turned into + /// a character literal for the Microsoft charize (#@) extension. + /// + static Token StringifyArgument(const Token *ArgToks, + Preprocessor &PP, bool Charify = false); +}; + +} // end namespace clang + +#endif diff --git a/clang/lib/Lex/MacroInfo.cpp b/clang/lib/Lex/MacroInfo.cpp new file mode 100644 index 00000000000..de19ff502a6 --- /dev/null +++ b/clang/lib/Lex/MacroInfo.cpp @@ -0,0 +1,70 @@ +//===--- MacroInfo.cpp - Information about #defined identifiers -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MacroInfo interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/Preprocessor.h" +using namespace clang; + +MacroInfo::MacroInfo(SourceLocation DefLoc) : Location(DefLoc) { + IsFunctionLike = false; + IsC99Varargs = false; + IsGNUVarargs = false; + IsBuiltinMacro = false; + IsDisabled = false; + IsUsed = true; + + ArgumentList = 0; + NumArguments = 0; +} + +/// isIdenticalTo - Return true if the specified macro definition is equal to +/// this macro in spelling, arguments, and whitespace. This is used to emit +/// duplicate definition warnings. This implements the rules in C99 6.10.3. +/// +bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const { + // Check # tokens in replacement, number of args, and various flags all match. + if (ReplacementTokens.size() != Other.ReplacementTokens.size() || + getNumArgs() != Other.getNumArgs() || + isFunctionLike() != Other.isFunctionLike() || + isC99Varargs() != Other.isC99Varargs() || + isGNUVarargs() != Other.isGNUVarargs()) + return false; + + // Check arguments. + for (arg_iterator I = arg_begin(), OI = Other.arg_begin(), E = arg_end(); + I != E; ++I, ++OI) + if (*I != *OI) return false; + + // Check all the tokens. + for (unsigned i = 0, e = ReplacementTokens.size(); i != e; ++i) { + const Token &A = ReplacementTokens[i]; + const Token &B = Other.ReplacementTokens[i]; + if (A.getKind() != B.getKind() || + A.isAtStartOfLine() != B.isAtStartOfLine() || + A.hasLeadingSpace() != B.hasLeadingSpace()) + return false; + + // If this is an identifier, it is easy. + if (A.getIdentifierInfo() || B.getIdentifierInfo()) { + if (A.getIdentifierInfo() != B.getIdentifierInfo()) + return false; + continue; + } + + // Otherwise, check the spelling. + if (PP.getSpelling(A) != PP.getSpelling(B)) + return false; + } + + return true; +} diff --git a/clang/lib/Lex/Makefile b/clang/lib/Lex/Makefile new file mode 100644 index 00000000000..187448c9922 --- /dev/null +++ b/clang/lib/Lex/Makefile @@ -0,0 +1,28 @@ +##===- clang/lib/Lex/Makefile ------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements the Lexer library for the C-Language front-end. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +include $(LEVEL)/Makefile.config + +LIBRARYNAME := clangLex +BUILD_ARCHIVE = 1 +CXXFLAGS = -fno-rtti + +ifeq ($(ARCH),PowerPC) +CXXFLAGS += -maltivec +endif + +CPPFLAGS += -I$(PROJ_SRC_DIR)/../../include + +include $(LEVEL)/Makefile.common + diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp new file mode 100644 index 00000000000..b24f5b63760 --- /dev/null +++ b/clang/lib/Lex/PPDirectives.cpp @@ -0,0 +1,1153 @@ +//===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements # directive processing for the Preprocessor. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/SourceManager.h" +using namespace clang; + +//===----------------------------------------------------------------------===// +// Utility Methods for Preprocessor Directive Handling. +//===----------------------------------------------------------------------===// + +/// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the +/// current line until the tok::eom token is found. +void Preprocessor::DiscardUntilEndOfDirective() { + Token Tmp; + do { + LexUnexpandedToken(Tmp); + } while (Tmp.isNot(tok::eom)); +} + +/// isCXXNamedOperator - Returns "true" if the token is a named operator in C++. +static bool isCXXNamedOperator(const std::string &Spelling) { + return Spelling == "and" || Spelling == "bitand" || Spelling == "bitor" || + Spelling == "compl" || Spelling == "not" || Spelling == "not_eq" || + Spelling == "or" || Spelling == "xor"; +} + +/// ReadMacroName - Lex and validate a macro name, which occurs after a +/// #define or #undef. This sets the token kind to eom and discards the rest +/// of the macro line if the macro name is invalid. isDefineUndef is 1 if +/// this is due to a a #define, 2 if #undef directive, 0 if it is something +/// else (e.g. #ifdef). +void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) { + // Read the token, don't allow macro expansion on it. + LexUnexpandedToken(MacroNameTok); + + // Missing macro name? + if (MacroNameTok.is(tok::eom)) + return Diag(MacroNameTok, diag::err_pp_missing_macro_name); + + IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); + if (II == 0) { + std::string Spelling = getSpelling(MacroNameTok); + if (isCXXNamedOperator(Spelling)) + // C++ 2.5p2: Alternative tokens behave the same as its primary token + // except for their spellings. + Diag(MacroNameTok, diag::err_pp_operator_used_as_macro_name, Spelling); + else + Diag(MacroNameTok, diag::err_pp_macro_not_identifier); + // Fall through on error. + } else if (isDefineUndef && II->getPPKeywordID() == tok::pp_defined) { + // Error if defining "defined": C99 6.10.8.4. + Diag(MacroNameTok, diag::err_defined_macro_name); + } else if (isDefineUndef && II->hasMacroDefinition() && + getMacroInfo(II)->isBuiltinMacro()) { + // Error if defining "__LINE__" and other builtins: C99 6.10.8.4. + if (isDefineUndef == 1) + Diag(MacroNameTok, diag::pp_redef_builtin_macro); + else + Diag(MacroNameTok, diag::pp_undef_builtin_macro); + } else { + // Okay, we got a good identifier node. Return it. + return; + } + + // Invalid macro name, read and discard the rest of the line. Then set the + // token kind to tok::eom. + MacroNameTok.setKind(tok::eom); + return DiscardUntilEndOfDirective(); +} + +/// CheckEndOfDirective - Ensure that the next token is a tok::eom token. If +/// not, emit a diagnostic and consume up until the eom. +void Preprocessor::CheckEndOfDirective(const char *DirType) { + Token Tmp; + // Lex unexpanded tokens: macros might expand to zero tokens, causing us to + // miss diagnosing invalid lines. + LexUnexpandedToken(Tmp); + + // There should be no tokens after the directive, but we allow them as an + // extension. + while (Tmp.is(tok::comment)) // Skip comments in -C mode. + LexUnexpandedToken(Tmp); + + if (Tmp.isNot(tok::eom)) { + Diag(Tmp, diag::ext_pp_extra_tokens_at_eol, DirType); + DiscardUntilEndOfDirective(); + } +} + + + +/// SkipExcludedConditionalBlock - We just read a #if or related directive and +/// decided that the subsequent tokens are in the #if'd out portion of the +/// file. Lex the rest of the file, until we see an #endif. If +/// FoundNonSkipPortion is true, then we have already emitted code for part of +/// this #if directive, so #else/#elif blocks should never be entered. If ElseOk +/// is true, then #else directives are ok, if not, then we have already seen one +/// so a #else directive is a duplicate. When this returns, the caller can lex +/// the first valid token. +void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, + bool FoundNonSkipPortion, + bool FoundElse) { + ++NumSkipped; + assert(CurTokenLexer == 0 && CurLexer && + "Lexing a macro, not a file?"); + + CurLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/false, + FoundNonSkipPortion, FoundElse); + + // Enter raw mode to disable identifier lookup (and thus macro expansion), + // disabling warnings, etc. + CurLexer->LexingRawMode = true; + Token Tok; + while (1) { + CurLexer->Lex(Tok); + + // If this is the end of the buffer, we have an error. + if (Tok.is(tok::eof)) { + // Emit errors for each unterminated conditional on the stack, including + // the current one. + while (!CurLexer->ConditionalStack.empty()) { + Diag(CurLexer->ConditionalStack.back().IfLoc, + diag::err_pp_unterminated_conditional); + CurLexer->ConditionalStack.pop_back(); + } + + // Just return and let the caller lex after this #include. + break; + } + + // If this token is not a preprocessor directive, just skip it. + if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine()) + continue; + + // We just parsed a # character at the start of a line, so we're in + // directive mode. Tell the lexer this so any newlines we see will be + // converted into an EOM token (this terminates the macro). + CurLexer->ParsingPreprocessorDirective = true; + CurLexer->KeepCommentMode = false; + + + // Read the next token, the directive flavor. + LexUnexpandedToken(Tok); + + // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or + // something bogus), skip it. + if (Tok.isNot(tok::identifier)) { + CurLexer->ParsingPreprocessorDirective = false; + // Restore comment saving mode. + CurLexer->KeepCommentMode = KeepComments; + continue; + } + + // If the first letter isn't i or e, it isn't intesting to us. We know that + // this is safe in the face of spelling differences, because there is no way + // to spell an i/e in a strange way that is another letter. Skipping this + // allows us to avoid looking up the identifier info for #define/#undef and + // other common directives. + const char *RawCharData = SourceMgr.getCharacterData(Tok.getLocation()); + char FirstChar = RawCharData[0]; + if (FirstChar >= 'a' && FirstChar <= 'z' && + FirstChar != 'i' && FirstChar != 'e') { + CurLexer->ParsingPreprocessorDirective = false; + // Restore comment saving mode. + CurLexer->KeepCommentMode = KeepComments; + continue; + } + + // Get the identifier name without trigraphs or embedded newlines. Note + // that we can't use Tok.getIdentifierInfo() because its lookup is disabled + // when skipping. + // TODO: could do this with zero copies in the no-clean case by using + // strncmp below. + char Directive[20]; + unsigned IdLen; + if (!Tok.needsCleaning() && Tok.getLength() < 20) { + IdLen = Tok.getLength(); + memcpy(Directive, RawCharData, IdLen); + Directive[IdLen] = 0; + } else { + std::string DirectiveStr = getSpelling(Tok); + IdLen = DirectiveStr.size(); + if (IdLen >= 20) { + CurLexer->ParsingPreprocessorDirective = false; + // Restore comment saving mode. + CurLexer->KeepCommentMode = KeepComments; + continue; + } + memcpy(Directive, &DirectiveStr[0], IdLen); + Directive[IdLen] = 0; + } + + if (FirstChar == 'i' && Directive[1] == 'f') { + if ((IdLen == 2) || // "if" + (IdLen == 5 && !strcmp(Directive+2, "def")) || // "ifdef" + (IdLen == 6 && !strcmp(Directive+2, "ndef"))) { // "ifndef" + // We know the entire #if/#ifdef/#ifndef block will be skipped, don't + // bother parsing the condition. + DiscardUntilEndOfDirective(); + CurLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true, + /*foundnonskip*/false, + /*fnddelse*/false); + } + } else if (FirstChar == 'e') { + if (IdLen == 5 && !strcmp(Directive+1, "ndif")) { // "endif" + CheckEndOfDirective("#endif"); + PPConditionalInfo CondInfo; + CondInfo.WasSkipping = true; // Silence bogus warning. + bool InCond = CurLexer->popConditionalLevel(CondInfo); + InCond = InCond; // Silence warning in no-asserts mode. + assert(!InCond && "Can't be skipping if not in a conditional!"); + + // If we popped the outermost skipping block, we're done skipping! + if (!CondInfo.WasSkipping) + break; + } else if (IdLen == 4 && !strcmp(Directive+1, "lse")) { // "else". + // #else directive in a skipping conditional. If not in some other + // skipping conditional, and if #else hasn't already been seen, enter it + // as a non-skipping conditional. + CheckEndOfDirective("#else"); + PPConditionalInfo &CondInfo = CurLexer->peekConditionalLevel(); + + // If this is a #else with a #else before it, report the error. + if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_else_after_else); + + // Note that we've seen a #else in this conditional. + CondInfo.FoundElse = true; + + // If the conditional is at the top level, and the #if block wasn't + // entered, enter the #else block now. + if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) { + CondInfo.FoundNonSkip = true; + break; + } + } else if (IdLen == 4 && !strcmp(Directive+1, "lif")) { // "elif". + PPConditionalInfo &CondInfo = CurLexer->peekConditionalLevel(); + + bool ShouldEnter; + // If this is in a skipping block or if we're already handled this #if + // block, don't bother parsing the condition. + if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) { + DiscardUntilEndOfDirective(); + ShouldEnter = false; + } else { + // Restore the value of LexingRawMode so that identifiers are + // looked up, etc, inside the #elif expression. + assert(CurLexer->LexingRawMode && "We have to be skipping here!"); + CurLexer->LexingRawMode = false; + IdentifierInfo *IfNDefMacro = 0; + ShouldEnter = EvaluateDirectiveExpression(IfNDefMacro); + CurLexer->LexingRawMode = true; + } + + // If this is a #elif with a #else before it, report the error. + if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_elif_after_else); + + // If this condition is true, enter it! + if (ShouldEnter) { + CondInfo.FoundNonSkip = true; + break; + } + } + } + + CurLexer->ParsingPreprocessorDirective = false; + // Restore comment saving mode. + CurLexer->KeepCommentMode = KeepComments; + } + + // Finally, if we are out of the conditional (saw an #endif or ran off the end + // of the file, just stop skipping and return to lexing whatever came after + // the #if block. + CurLexer->LexingRawMode = false; +} + +/// LookupFile - Given a "foo" or <foo> reference, look up the indicated file, +/// return null on failure. isAngled indicates whether the file reference is +/// for system #include's or not (i.e. using <> instead of ""). +const FileEntry *Preprocessor::LookupFile(const char *FilenameStart, + const char *FilenameEnd, + bool isAngled, + const DirectoryLookup *FromDir, + const DirectoryLookup *&CurDir) { + // If the header lookup mechanism may be relative to the current file, pass in + // info about where the current file is. + const FileEntry *CurFileEnt = 0; + if (!FromDir) { + SourceLocation FileLoc = getCurrentFileLexer()->getFileLoc(); + CurFileEnt = SourceMgr.getFileEntryForLoc(FileLoc); + } + + // Do a standard file entry lookup. + CurDir = CurDirLookup; + const FileEntry *FE = + HeaderInfo.LookupFile(FilenameStart, FilenameEnd, + isAngled, FromDir, CurDir, CurFileEnt); + if (FE) return FE; + + // Otherwise, see if this is a subframework header. If so, this is relative + // to one of the headers on the #include stack. Walk the list of the current + // headers on the #include stack and pass them to HeaderInfo. + if (CurLexer && !CurLexer->Is_PragmaLexer) { + if ((CurFileEnt = SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc()))) + if ((FE = HeaderInfo.LookupSubframeworkHeader(FilenameStart, FilenameEnd, + CurFileEnt))) + return FE; + } + + for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) { + IncludeStackInfo &ISEntry = IncludeMacroStack[e-i-1]; + if (ISEntry.TheLexer && !ISEntry.TheLexer->Is_PragmaLexer) { + if ((CurFileEnt = + SourceMgr.getFileEntryForLoc(ISEntry.TheLexer->getFileLoc()))) + if ((FE = HeaderInfo.LookupSubframeworkHeader(FilenameStart, + FilenameEnd, CurFileEnt))) + return FE; + } + } + + // Otherwise, we really couldn't find the file. + return 0; +} + + +//===----------------------------------------------------------------------===// +// Preprocessor Directive Handling. +//===----------------------------------------------------------------------===// + +/// HandleDirective - This callback is invoked when the lexer sees a # token +/// at the start of a line. This consumes the directive, modifies the +/// lexer/preprocessor state, and advances the lexer(s) so that the next token +/// read is the correct one. +void Preprocessor::HandleDirective(Token &Result) { + // FIXME: Traditional: # with whitespace before it not recognized by K&R? + + // We just parsed a # character at the start of a line, so we're in directive + // mode. Tell the lexer this so any newlines we see will be converted into an + // EOM token (which terminates the directive). + CurLexer->ParsingPreprocessorDirective = true; + + ++NumDirectives; + + // We are about to read a token. For the multiple-include optimization FA to + // work, we have to remember if we had read any tokens *before* this + // pp-directive. + bool ReadAnyTokensBeforeDirective = CurLexer->MIOpt.getHasReadAnyTokensVal(); + + // Read the next token, the directive flavor. This isn't expanded due to + // C99 6.10.3p8. + LexUnexpandedToken(Result); + + // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.: + // #define A(x) #x + // A(abc + // #warning blah + // def) + // If so, the user is relying on non-portable behavior, emit a diagnostic. + if (InMacroArgs) + Diag(Result, diag::ext_embedded_directive); + +TryAgain: + switch (Result.getKind()) { + case tok::eom: + return; // null directive. + case tok::comment: + // Handle stuff like "# /*foo*/ define X" in -E -C mode. + LexUnexpandedToken(Result); + goto TryAgain; + + case tok::numeric_constant: + // FIXME: implement # 7 line numbers! + DiscardUntilEndOfDirective(); + return; + default: + IdentifierInfo *II = Result.getIdentifierInfo(); + if (II == 0) break; // Not an identifier. + + // Ask what the preprocessor keyword ID is. + switch (II->getPPKeywordID()) { + default: break; + // C99 6.10.1 - Conditional Inclusion. + case tok::pp_if: + return HandleIfDirective(Result, ReadAnyTokensBeforeDirective); + case tok::pp_ifdef: + return HandleIfdefDirective(Result, false, true/*not valid for miopt*/); + case tok::pp_ifndef: + return HandleIfdefDirective(Result, true, ReadAnyTokensBeforeDirective); + case tok::pp_elif: + return HandleElifDirective(Result); + case tok::pp_else: + return HandleElseDirective(Result); + case tok::pp_endif: + return HandleEndifDirective(Result); + + // C99 6.10.2 - Source File Inclusion. + case tok::pp_include: + return HandleIncludeDirective(Result); // Handle #include. + + // C99 6.10.3 - Macro Replacement. + case tok::pp_define: + return HandleDefineDirective(Result); + case tok::pp_undef: + return HandleUndefDirective(Result); + + // C99 6.10.4 - Line Control. + case tok::pp_line: + // FIXME: implement #line + DiscardUntilEndOfDirective(); + return; + + // C99 6.10.5 - Error Directive. + case tok::pp_error: + return HandleUserDiagnosticDirective(Result, false); + + // C99 6.10.6 - Pragma Directive. + case tok::pp_pragma: + return HandlePragmaDirective(); + + // GNU Extensions. + case tok::pp_import: + return HandleImportDirective(Result); + case tok::pp_include_next: + return HandleIncludeNextDirective(Result); + + case tok::pp_warning: + Diag(Result, diag::ext_pp_warning_directive); + return HandleUserDiagnosticDirective(Result, true); + case tok::pp_ident: + return HandleIdentSCCSDirective(Result); + case tok::pp_sccs: + return HandleIdentSCCSDirective(Result); + case tok::pp_assert: + //isExtension = true; // FIXME: implement #assert + break; + case tok::pp_unassert: + //isExtension = true; // FIXME: implement #unassert + break; + } + break; + } + + // If we reached here, the preprocessing token is not valid! + Diag(Result, diag::err_pp_invalid_directive); + + // Read the rest of the PP line. + DiscardUntilEndOfDirective(); + + // Okay, we're done parsing the directive. +} + +void Preprocessor::HandleUserDiagnosticDirective(Token &Tok, + bool isWarning) { + // Read the rest of the line raw. We do this because we don't want macros + // to be expanded and we don't require that the tokens be valid preprocessing + // tokens. For example, this is allowed: "#warning ` 'foo". GCC does + // collapse multiple consequtive white space between tokens, but this isn't + // specified by the standard. + std::string Message = CurLexer->ReadToEndOfLine(); + + unsigned DiagID = isWarning ? diag::pp_hash_warning : diag::err_pp_hash_error; + return Diag(Tok, DiagID, Message); +} + +/// HandleIdentSCCSDirective - Handle a #ident/#sccs directive. +/// +void Preprocessor::HandleIdentSCCSDirective(Token &Tok) { + // Yes, this directive is an extension. + Diag(Tok, diag::ext_pp_ident_directive); + + // Read the string argument. + Token StrTok; + Lex(StrTok); + + // If the token kind isn't a string, it's a malformed directive. + if (StrTok.isNot(tok::string_literal) && + StrTok.isNot(tok::wide_string_literal)) + return Diag(StrTok, diag::err_pp_malformed_ident); + + // Verify that there is nothing after the string, other than EOM. + CheckEndOfDirective("#ident"); + + if (Callbacks) + Callbacks->Ident(Tok.getLocation(), getSpelling(StrTok)); +} + +//===----------------------------------------------------------------------===// +// Preprocessor Include Directive Handling. +//===----------------------------------------------------------------------===// + +/// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully +/// checked and spelled filename, e.g. as an operand of #include. This returns +/// true if the input filename was in <>'s or false if it were in ""'s. The +/// caller is expected to provide a buffer that is large enough to hold the +/// spelling of the filename, but is also expected to handle the case when +/// this method decides to use a different buffer. +bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc, + const char *&BufStart, + const char *&BufEnd) { + // Get the text form of the filename. + assert(BufStart != BufEnd && "Can't have tokens with empty spellings!"); + + // Make sure the filename is <x> or "x". + bool isAngled; + if (BufStart[0] == '<') { + if (BufEnd[-1] != '>') { + Diag(Loc, diag::err_pp_expects_filename); + BufStart = 0; + return true; + } + isAngled = true; + } else if (BufStart[0] == '"') { + if (BufEnd[-1] != '"') { + Diag(Loc, diag::err_pp_expects_filename); + BufStart = 0; + return true; + } + isAngled = false; + } else { + Diag(Loc, diag::err_pp_expects_filename); + BufStart = 0; + return true; + } + + // Diagnose #include "" as invalid. + if (BufEnd-BufStart <= 2) { + Diag(Loc, diag::err_pp_empty_filename); + BufStart = 0; + return ""; + } + + // Skip the brackets. + ++BufStart; + --BufEnd; + return isAngled; +} + +/// ConcatenateIncludeName - Handle cases where the #include name is expanded +/// from a macro as multiple tokens, which need to be glued together. This +/// occurs for code like: +/// #define FOO <a/b.h> +/// #include FOO +/// because in this case, "<a/b.h>" is returned as 7 tokens, not one. +/// +/// This code concatenates and consumes tokens up to the '>' token. It returns +/// false if the > was found, otherwise it returns true if it finds and consumes +/// the EOM marker. +static bool ConcatenateIncludeName(llvm::SmallVector<char, 128> &FilenameBuffer, + Preprocessor &PP) { + Token CurTok; + + PP.Lex(CurTok); + while (CurTok.isNot(tok::eom)) { + // Append the spelling of this token to the buffer. If there was a space + // before it, add it now. + if (CurTok.hasLeadingSpace()) + FilenameBuffer.push_back(' '); + + // Get the spelling of the token, directly into FilenameBuffer if possible. + unsigned PreAppendSize = FilenameBuffer.size(); + FilenameBuffer.resize(PreAppendSize+CurTok.getLength()); + + const char *BufPtr = &FilenameBuffer[PreAppendSize]; + unsigned ActualLen = PP.getSpelling(CurTok, BufPtr); + + // If the token was spelled somewhere else, copy it into FilenameBuffer. + if (BufPtr != &FilenameBuffer[PreAppendSize]) + memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen); + + // Resize FilenameBuffer to the correct size. + if (CurTok.getLength() != ActualLen) + FilenameBuffer.resize(PreAppendSize+ActualLen); + + // If we found the '>' marker, return success. + if (CurTok.is(tok::greater)) + return false; + + PP.Lex(CurTok); + } + + // If we hit the eom marker, emit an error and return true so that the caller + // knows the EOM has been read. + PP.Diag(CurTok.getLocation(), diag::err_pp_expects_filename); + return true; +} + +/// HandleIncludeDirective - The "#include" tokens have just been read, read the +/// file to be included from the lexer, then include it! This is a common +/// routine with functionality shared between #include, #include_next and +/// #import. +void Preprocessor::HandleIncludeDirective(Token &IncludeTok, + const DirectoryLookup *LookupFrom, + bool isImport) { + + Token FilenameTok; + CurLexer->LexIncludeFilename(FilenameTok); + + // Reserve a buffer to get the spelling. + llvm::SmallVector<char, 128> FilenameBuffer; + const char *FilenameStart, *FilenameEnd; + + switch (FilenameTok.getKind()) { + case tok::eom: + // If the token kind is EOM, the error has already been diagnosed. + return; + + case tok::angle_string_literal: + case tok::string_literal: { + FilenameBuffer.resize(FilenameTok.getLength()); + FilenameStart = &FilenameBuffer[0]; + unsigned Len = getSpelling(FilenameTok, FilenameStart); + FilenameEnd = FilenameStart+Len; + break; + } + + case tok::less: + // This could be a <foo/bar.h> file coming from a macro expansion. In this + // case, glue the tokens together into FilenameBuffer and interpret those. + FilenameBuffer.push_back('<'); + if (ConcatenateIncludeName(FilenameBuffer, *this)) + return; // Found <eom> but no ">"? Diagnostic already emitted. + FilenameStart = &FilenameBuffer[0]; + FilenameEnd = &FilenameBuffer[FilenameBuffer.size()]; + break; + default: + Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); + DiscardUntilEndOfDirective(); + return; + } + + bool isAngled = GetIncludeFilenameSpelling(FilenameTok.getLocation(), + FilenameStart, FilenameEnd); + // If GetIncludeFilenameSpelling set the start ptr to null, there was an + // error. + if (FilenameStart == 0) { + DiscardUntilEndOfDirective(); + return; + } + + // Verify that there is nothing after the filename, other than EOM. Use the + // preprocessor to lex this in case lexing the filename entered a macro. + CheckEndOfDirective("#include"); + + // Check that we don't have infinite #include recursion. + if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) + return Diag(FilenameTok, diag::err_pp_include_too_deep); + + // Search include directories. + const DirectoryLookup *CurDir; + const FileEntry *File = LookupFile(FilenameStart, FilenameEnd, + isAngled, LookupFrom, CurDir); + if (File == 0) + return Diag(FilenameTok, diag::err_pp_file_not_found, + std::string(FilenameStart, FilenameEnd)); + + // Ask HeaderInfo if we should enter this #include file. + if (!HeaderInfo.ShouldEnterIncludeFile(File, isImport)) { + // If it returns true, #including this file will have no effect. + return; + } + + // Look up the file, create a File ID for it. + unsigned FileID = SourceMgr.createFileID(File, FilenameTok.getLocation()); + if (FileID == 0) + return Diag(FilenameTok, diag::err_pp_file_not_found, + std::string(FilenameStart, FilenameEnd)); + + // Finally, if all is good, enter the new file! + EnterSourceFile(FileID, CurDir); +} + +/// HandleIncludeNextDirective - Implements #include_next. +/// +void Preprocessor::HandleIncludeNextDirective(Token &IncludeNextTok) { + Diag(IncludeNextTok, diag::ext_pp_include_next_directive); + + // #include_next is like #include, except that we start searching after + // the current found directory. If we can't do this, issue a + // diagnostic. + const DirectoryLookup *Lookup = CurDirLookup; + if (isInPrimaryFile()) { + Lookup = 0; + Diag(IncludeNextTok, diag::pp_include_next_in_primary); + } else if (Lookup == 0) { + Diag(IncludeNextTok, diag::pp_include_next_absolute_path); + } else { + // Start looking up in the next directory. + ++Lookup; + } + + return HandleIncludeDirective(IncludeNextTok, Lookup); +} + +/// HandleImportDirective - Implements #import. +/// +void Preprocessor::HandleImportDirective(Token &ImportTok) { + Diag(ImportTok, diag::ext_pp_import_directive); + + return HandleIncludeDirective(ImportTok, 0, true); +} + +//===----------------------------------------------------------------------===// +// Preprocessor Macro Directive Handling. +//===----------------------------------------------------------------------===// + +/// ReadMacroDefinitionArgList - The ( starting an argument list of a macro +/// definition has just been read. Lex the rest of the arguments and the +/// closing ), updating MI with what we learn. Return true if an error occurs +/// parsing the arg list. +bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) { + llvm::SmallVector<IdentifierInfo*, 32> Arguments; + + Token Tok; + while (1) { + LexUnexpandedToken(Tok); + switch (Tok.getKind()) { + case tok::r_paren: + // Found the end of the argument list. + if (Arguments.empty()) { // #define FOO() + MI->setArgumentList(Arguments.begin(), Arguments.end()); + return false; + } + // Otherwise we have #define FOO(A,) + Diag(Tok, diag::err_pp_expected_ident_in_arg_list); + return true; + case tok::ellipsis: // #define X(... -> C99 varargs + // Warn if use of C99 feature in non-C99 mode. + if (!Features.C99) Diag(Tok, diag::ext_variadic_macro); + + // Lex the token after the identifier. + LexUnexpandedToken(Tok); + if (Tok.isNot(tok::r_paren)) { + Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); + return true; + } + // Add the __VA_ARGS__ identifier as an argument. + Arguments.push_back(Ident__VA_ARGS__); + MI->setIsC99Varargs(); + MI->setArgumentList(Arguments.begin(), Arguments.end()); + return false; + case tok::eom: // #define X( + Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); + return true; + default: + // Handle keywords and identifiers here to accept things like + // #define Foo(for) for. + IdentifierInfo *II = Tok.getIdentifierInfo(); + if (II == 0) { + // #define X(1 + Diag(Tok, diag::err_pp_invalid_tok_in_arg_list); + return true; + } + + // If this is already used as an argument, it is used multiple times (e.g. + // #define X(A,A. + if (std::find(Arguments.begin(), Arguments.end(), II) != + Arguments.end()) { // C99 6.10.3p6 + Diag(Tok, diag::err_pp_duplicate_name_in_arg_list, II->getName()); + return true; + } + + // Add the argument to the macro info. + Arguments.push_back(II); + + // Lex the token after the identifier. + LexUnexpandedToken(Tok); + + switch (Tok.getKind()) { + default: // #define X(A B + Diag(Tok, diag::err_pp_expected_comma_in_arg_list); + return true; + case tok::r_paren: // #define X(A) + MI->setArgumentList(Arguments.begin(), Arguments.end()); + return false; + case tok::comma: // #define X(A, + break; + case tok::ellipsis: // #define X(A... -> GCC extension + // Diagnose extension. + Diag(Tok, diag::ext_named_variadic_macro); + + // Lex the token after the identifier. + LexUnexpandedToken(Tok); + if (Tok.isNot(tok::r_paren)) { + Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); + return true; + } + + MI->setIsGNUVarargs(); + MI->setArgumentList(Arguments.begin(), Arguments.end()); + return false; + } + } + } +} + +/// HandleDefineDirective - Implements #define. This consumes the entire macro +/// line then lets the caller lex the next real token. +void Preprocessor::HandleDefineDirective(Token &DefineTok) { + ++NumDefined; + + Token MacroNameTok; + ReadMacroName(MacroNameTok, 1); + + // Error reading macro name? If so, diagnostic already issued. + if (MacroNameTok.is(tok::eom)) + return; + + // If we are supposed to keep comments in #defines, reenable comment saving + // mode. + CurLexer->KeepCommentMode = KeepMacroComments; + + // Create the new macro. + MacroInfo *MI = new MacroInfo(MacroNameTok.getLocation()); + + Token Tok; + LexUnexpandedToken(Tok); + + // If this is a function-like macro definition, parse the argument list, + // marking each of the identifiers as being used as macro arguments. Also, + // check other constraints on the first token of the macro body. + if (Tok.is(tok::eom)) { + // If there is no body to this macro, we have no special handling here. + } else if (Tok.is(tok::l_paren) && !Tok.hasLeadingSpace()) { + // This is a function-like macro definition. Read the argument list. + MI->setIsFunctionLike(); + if (ReadMacroDefinitionArgList(MI)) { + // Forget about MI. + delete MI; + // Throw away the rest of the line. + if (CurLexer->ParsingPreprocessorDirective) + DiscardUntilEndOfDirective(); + return; + } + + // Read the first token after the arg list for down below. + LexUnexpandedToken(Tok); + } else if (!Tok.hasLeadingSpace()) { + // C99 requires whitespace between the macro definition and the body. Emit + // a diagnostic for something like "#define X+". + if (Features.C99) { + Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name); + } else { + // FIXME: C90/C++ do not get this diagnostic, but it does get a similar + // one in some cases! + } + } else { + // This is a normal token with leading space. Clear the leading space + // marker on the first token to get proper expansion. + Tok.clearFlag(Token::LeadingSpace); + } + + // If this is a definition of a variadic C99 function-like macro, not using + // the GNU named varargs extension, enabled __VA_ARGS__. + + // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. + // This gets unpoisoned where it is allowed. + assert(Ident__VA_ARGS__->isPoisoned() && "__VA_ARGS__ should be poisoned!"); + if (MI->isC99Varargs()) + Ident__VA_ARGS__->setIsPoisoned(false); + + // Read the rest of the macro body. + if (MI->isObjectLike()) { + // Object-like macros are very simple, just read their body. + while (Tok.isNot(tok::eom)) { + MI->AddTokenToBody(Tok); + // Get the next token of the macro. + LexUnexpandedToken(Tok); + } + + } else { + // Otherwise, read the body of a function-like macro. This has to validate + // the # (stringize) operator. + while (Tok.isNot(tok::eom)) { + MI->AddTokenToBody(Tok); + + // Check C99 6.10.3.2p1: ensure that # operators are followed by macro + // parameters in function-like macro expansions. + if (Tok.isNot(tok::hash)) { + // Get the next token of the macro. + LexUnexpandedToken(Tok); + continue; + } + + // Get the next token of the macro. + LexUnexpandedToken(Tok); + + // Not a macro arg identifier? + if (!Tok.getIdentifierInfo() || + MI->getArgumentNum(Tok.getIdentifierInfo()) == -1) { + Diag(Tok, diag::err_pp_stringize_not_parameter); + delete MI; + + // Disable __VA_ARGS__ again. + Ident__VA_ARGS__->setIsPoisoned(true); + return; + } + + // Things look ok, add the param name token to the macro. + MI->AddTokenToBody(Tok); + + // Get the next token of the macro. + LexUnexpandedToken(Tok); + } + } + + + // Disable __VA_ARGS__ again. + Ident__VA_ARGS__->setIsPoisoned(true); + + // Check that there is no paste (##) operator at the begining or end of the + // replacement list. + unsigned NumTokens = MI->getNumTokens(); + if (NumTokens != 0) { + if (MI->getReplacementToken(0).is(tok::hashhash)) { + Diag(MI->getReplacementToken(0), diag::err_paste_at_start); + delete MI; + return; + } + if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) { + Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end); + delete MI; + return; + } + } + + // If this is the primary source file, remember that this macro hasn't been + // used yet. + if (isInPrimaryFile()) + MI->setIsUsed(false); + + // Finally, if this identifier already had a macro defined for it, verify that + // the macro bodies are identical and free the old definition. + if (MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo())) { + if (!OtherMI->isUsed()) + Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used); + + // Macros must be identical. This means all tokes and whitespace separation + // must be the same. C99 6.10.3.2. + if (!MI->isIdenticalTo(*OtherMI, *this)) { + Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef, + MacroNameTok.getIdentifierInfo()->getName()); + Diag(OtherMI->getDefinitionLoc(), diag::ext_pp_macro_redef2); + } + delete OtherMI; + } + + setMacroInfo(MacroNameTok.getIdentifierInfo(), MI); +} + +/// HandleUndefDirective - Implements #undef. +/// +void Preprocessor::HandleUndefDirective(Token &UndefTok) { + ++NumUndefined; + + Token MacroNameTok; + ReadMacroName(MacroNameTok, 2); + + // Error reading macro name? If so, diagnostic already issued. + if (MacroNameTok.is(tok::eom)) + return; + + // Check to see if this is the last token on the #undef line. + CheckEndOfDirective("#undef"); + + // Okay, we finally have a valid identifier to undef. + MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo()); + + // If the macro is not defined, this is a noop undef, just return. + if (MI == 0) return; + + if (!MI->isUsed()) + Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used); + + // Free macro definition. + delete MI; + setMacroInfo(MacroNameTok.getIdentifierInfo(), 0); +} + + +//===----------------------------------------------------------------------===// +// Preprocessor Conditional Directive Handling. +//===----------------------------------------------------------------------===// + +/// HandleIfdefDirective - Implements the #ifdef/#ifndef directive. isIfndef is +/// true when this is a #ifndef directive. ReadAnyTokensBeforeDirective is true +/// if any tokens have been returned or pp-directives activated before this +/// #ifndef has been lexed. +/// +void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef, + bool ReadAnyTokensBeforeDirective) { + ++NumIf; + Token DirectiveTok = Result; + + Token MacroNameTok; + ReadMacroName(MacroNameTok); + + // Error reading macro name? If so, diagnostic already issued. + if (MacroNameTok.is(tok::eom)) { + // Skip code until we get to #endif. This helps with recovery by not + // emitting an error when the #endif is reached. + SkipExcludedConditionalBlock(DirectiveTok.getLocation(), + /*Foundnonskip*/false, /*FoundElse*/false); + return; + } + + // Check to see if this is the last token on the #if[n]def line. + CheckEndOfDirective(isIfndef ? "#ifndef" : "#ifdef"); + + if (CurLexer->getConditionalStackDepth() == 0) { + // If the start of a top-level #ifdef, inform MIOpt. + if (!ReadAnyTokensBeforeDirective) { + assert(isIfndef && "#ifdef shouldn't reach here"); + CurLexer->MIOpt.EnterTopLevelIFNDEF(MacroNameTok.getIdentifierInfo()); + } else + CurLexer->MIOpt.EnterTopLevelConditional(); + } + + IdentifierInfo *MII = MacroNameTok.getIdentifierInfo(); + MacroInfo *MI = getMacroInfo(MII); + + // If there is a macro, process it. + if (MI) // Mark it used. + MI->setIsUsed(true); + + // Should we include the stuff contained by this directive? + if (!MI == isIfndef) { + // Yes, remember that we are inside a conditional, then lex the next token. + CurLexer->pushConditionalLevel(DirectiveTok.getLocation(), /*wasskip*/false, + /*foundnonskip*/true, /*foundelse*/false); + } else { + // No, skip the contents of this block and return the first token after it. + SkipExcludedConditionalBlock(DirectiveTok.getLocation(), + /*Foundnonskip*/false, + /*FoundElse*/false); + } +} + +/// HandleIfDirective - Implements the #if directive. +/// +void Preprocessor::HandleIfDirective(Token &IfToken, + bool ReadAnyTokensBeforeDirective) { + ++NumIf; + + // Parse and evaluation the conditional expression. + IdentifierInfo *IfNDefMacro = 0; + bool ConditionalTrue = EvaluateDirectiveExpression(IfNDefMacro); + + // Should we include the stuff contained by this directive? + if (ConditionalTrue) { + // If this condition is equivalent to #ifndef X, and if this is the first + // directive seen, handle it for the multiple-include optimization. + if (CurLexer->getConditionalStackDepth() == 0) { + if (!ReadAnyTokensBeforeDirective && IfNDefMacro) + CurLexer->MIOpt.EnterTopLevelIFNDEF(IfNDefMacro); + else + CurLexer->MIOpt.EnterTopLevelConditional(); + } + + // Yes, remember that we are inside a conditional, then lex the next token. + CurLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false, + /*foundnonskip*/true, /*foundelse*/false); + } else { + // No, skip the contents of this block and return the first token after it. + SkipExcludedConditionalBlock(IfToken.getLocation(), /*Foundnonskip*/false, + /*FoundElse*/false); + } +} + +/// HandleEndifDirective - Implements the #endif directive. +/// +void Preprocessor::HandleEndifDirective(Token &EndifToken) { + ++NumEndif; + + // Check that this is the whole directive. + CheckEndOfDirective("#endif"); + + PPConditionalInfo CondInfo; + if (CurLexer->popConditionalLevel(CondInfo)) { + // No conditionals on the stack: this is an #endif without an #if. + return Diag(EndifToken, diag::err_pp_endif_without_if); + } + + // If this the end of a top-level #endif, inform MIOpt. + if (CurLexer->getConditionalStackDepth() == 0) + CurLexer->MIOpt.ExitTopLevelConditional(); + + assert(!CondInfo.WasSkipping && !CurLexer->LexingRawMode && + "This code should only be reachable in the non-skipping case!"); +} + + +void Preprocessor::HandleElseDirective(Token &Result) { + ++NumElse; + + // #else directive in a non-skipping conditional... start skipping. + CheckEndOfDirective("#else"); + + PPConditionalInfo CI; + if (CurLexer->popConditionalLevel(CI)) + return Diag(Result, diag::pp_err_else_without_if); + + // If this is a top-level #else, inform the MIOpt. + if (CurLexer->getConditionalStackDepth() == 0) + CurLexer->MIOpt.EnterTopLevelConditional(); + + // If this is a #else with a #else before it, report the error. + if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else); + + // Finally, skip the rest of the contents of this block and return the first + // token after it. + return SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true, + /*FoundElse*/true); +} + +void Preprocessor::HandleElifDirective(Token &ElifToken) { + ++NumElse; + + // #elif directive in a non-skipping conditional... start skipping. + // We don't care what the condition is, because we will always skip it (since + // the block immediately before it was included). + DiscardUntilEndOfDirective(); + + PPConditionalInfo CI; + if (CurLexer->popConditionalLevel(CI)) + return Diag(ElifToken, diag::pp_err_elif_without_if); + + // If this is a top-level #elif, inform the MIOpt. + if (CurLexer->getConditionalStackDepth() == 0) + CurLexer->MIOpt.EnterTopLevelConditional(); + + // If this is a #elif with a #else before it, report the error. + if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else); + + // Finally, skip the rest of the contents of this block and return the first + // token after it. + return SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true, + /*FoundElse*/CI.FoundElse); +} + diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp new file mode 100644 index 00000000000..cca76289176 --- /dev/null +++ b/clang/lib/Lex/PPExpressions.cpp @@ -0,0 +1,639 @@ +//===--- PPExpressions.cpp - Preprocessor Expression Evaluation -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Preprocessor::EvaluateDirectiveExpression method, +// which parses and evaluates integer constant expressions for #if directives. +// +//===----------------------------------------------------------------------===// +// +// FIXME: implement testing for #assert's. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/SmallString.h" +using namespace clang; + +static bool EvaluateDirectiveSubExpr(llvm::APSInt &LHS, unsigned MinPrec, + Token &PeekTok, bool ValueLive, + Preprocessor &PP); + +/// DefinedTracker - This struct is used while parsing expressions to keep track +/// of whether !defined(X) has been seen. +/// +/// With this simple scheme, we handle the basic forms: +/// !defined(X) and !defined X +/// but we also trivially handle (silly) stuff like: +/// !!!defined(X) and +!defined(X) and !+!+!defined(X) and !(defined(X)). +struct DefinedTracker { + /// Each time a Value is evaluated, it returns information about whether the + /// parsed value is of the form defined(X), !defined(X) or is something else. + enum TrackerState { + DefinedMacro, // defined(X) + NotDefinedMacro, // !defined(X) + Unknown // Something else. + } State; + /// TheMacro - When the state is DefinedMacro or NotDefinedMacro, this + /// indicates the macro that was checked. + IdentifierInfo *TheMacro; +}; + + + +/// EvaluateValue - Evaluate the token PeekTok (and any others needed) and +/// return the computed value in Result. Return true if there was an error +/// parsing. This function also returns information about the form of the +/// expression in DT. See above for information on what DT means. +/// +/// If ValueLive is false, then this value is being evaluated in a context where +/// the result is not used. As such, avoid diagnostics that relate to +/// evaluation. +static bool EvaluateValue(llvm::APSInt &Result, Token &PeekTok, + DefinedTracker &DT, bool ValueLive, + Preprocessor &PP) { + Result = 0; + DT.State = DefinedTracker::Unknown; + + // If this token's spelling is a pp-identifier, check to see if it is + // 'defined' or if it is a macro. Note that we check here because many + // keywords are pp-identifiers, so we can't check the kind. + if (IdentifierInfo *II = PeekTok.getIdentifierInfo()) { + // If this identifier isn't 'defined' and it wasn't macro expanded, it turns + // into a simple 0, unless it is the C++ keyword "true", in which case it + // turns into "1". + if (II->getPPKeywordID() != tok::pp_defined) { + PP.Diag(PeekTok, diag::warn_pp_undef_identifier, II->getName()); + Result = II->getTokenID() == tok::kw_true; + Result.setIsUnsigned(false); // "0" is signed intmax_t 0. + PP.LexNonComment(PeekTok); + return false; + } + + // Handle "defined X" and "defined(X)". + + // Get the next token, don't expand it. + PP.LexUnexpandedToken(PeekTok); + + // Two options, it can either be a pp-identifier or a (. + bool InParens = false; + if (PeekTok.is(tok::l_paren)) { + // Found a paren, remember we saw it and skip it. + InParens = true; + PP.LexUnexpandedToken(PeekTok); + } + + // If we don't have a pp-identifier now, this is an error. + if ((II = PeekTok.getIdentifierInfo()) == 0) { + PP.Diag(PeekTok, diag::err_pp_defined_requires_identifier); + return true; + } + + // Otherwise, we got an identifier, is it defined to something? + Result = II->hasMacroDefinition(); + Result.setIsUnsigned(false); // Result is signed intmax_t. + + // If there is a macro, mark it used. + if (Result != 0 && ValueLive) { + MacroInfo *Macro = PP.getMacroInfo(II); + Macro->setIsUsed(true); + } + + // Consume identifier. + PP.LexNonComment(PeekTok); + + // If we are in parens, ensure we have a trailing ). + if (InParens) { + if (PeekTok.isNot(tok::r_paren)) { + PP.Diag(PeekTok, diag::err_pp_missing_rparen); + return true; + } + // Consume the ). + PP.LexNonComment(PeekTok); + } + + // Success, remember that we saw defined(X). + DT.State = DefinedTracker::DefinedMacro; + DT.TheMacro = II; + return false; + } + + switch (PeekTok.getKind()) { + default: // Non-value token. + PP.Diag(PeekTok, diag::err_pp_expr_bad_token); + return true; + case tok::eom: + case tok::r_paren: + // If there is no expression, report and exit. + PP.Diag(PeekTok, diag::err_pp_expected_value_in_expr); + return true; + case tok::numeric_constant: { + llvm::SmallString<64> IntegerBuffer; + IntegerBuffer.resize(PeekTok.getLength()); + const char *ThisTokBegin = &IntegerBuffer[0]; + unsigned ActualLength = PP.getSpelling(PeekTok, ThisTokBegin); + NumericLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, + PeekTok.getLocation(), PP); + if (Literal.hadError) + return true; // a diagnostic was already reported. + + if (Literal.isFloatingLiteral() || Literal.isImaginary) { + PP.Diag(PeekTok, diag::err_pp_illegal_floating_literal); + return true; + } + assert(Literal.isIntegerLiteral() && "Unknown ppnumber"); + + // long long is a C99 feature. + if (!PP.getLangOptions().C99 && !PP.getLangOptions().CPlusPlus0x + && Literal.isLongLong) + PP.Diag(PeekTok, diag::ext_longlong); + + // Parse the integer literal into Result. + if (Literal.GetIntegerValue(Result)) { + // Overflow parsing integer literal. + if (ValueLive) PP.Diag(PeekTok, diag::warn_integer_too_large); + Result.setIsUnsigned(true); + } else { + // Set the signedness of the result to match whether there was a U suffix + // or not. + Result.setIsUnsigned(Literal.isUnsigned); + + // Detect overflow based on whether the value is signed. If signed + // and if the value is too large, emit a warning "integer constant is so + // large that it is unsigned" e.g. on 12345678901234567890 where intmax_t + // is 64-bits. + if (!Literal.isUnsigned && Result.isNegative()) { + if (ValueLive)PP.Diag(PeekTok, diag::warn_integer_too_large_for_signed); + Result.setIsUnsigned(true); + } + } + + // Consume the token. + PP.LexNonComment(PeekTok); + return false; + } + case tok::char_constant: { // 'x' + llvm::SmallString<32> CharBuffer; + CharBuffer.resize(PeekTok.getLength()); + const char *ThisTokBegin = &CharBuffer[0]; + unsigned ActualLength = PP.getSpelling(PeekTok, ThisTokBegin); + CharLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, + PeekTok.getLocation(), PP); + if (Literal.hadError()) + return true; // A diagnostic was already emitted. + + // Character literals are always int or wchar_t, expand to intmax_t. + TargetInfo &TI = PP.getTargetInfo(); + unsigned NumBits = TI.getCharWidth(Literal.isWide()); + + // Set the width. + llvm::APSInt Val(NumBits); + // Set the value. + Val = Literal.getValue(); + // Set the signedness. + Val.setIsUnsigned(!TI.isCharSigned()); + + if (Result.getBitWidth() > Val.getBitWidth()) { + Result = Val.extend(Result.getBitWidth()); + } else { + assert(Result.getBitWidth() == Val.getBitWidth() && + "intmax_t smaller than char/wchar_t?"); + Result = Val; + } + + // Consume the token. + PP.LexNonComment(PeekTok); + return false; + } + case tok::l_paren: + PP.LexNonComment(PeekTok); // Eat the (. + // Parse the value and if there are any binary operators involved, parse + // them. + if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; + + // If this is a silly value like (X), which doesn't need parens, check for + // !(defined X). + if (PeekTok.is(tok::r_paren)) { + // Just use DT unmodified as our result. + } else { + if (EvaluateDirectiveSubExpr(Result, 1, PeekTok, ValueLive, PP)) + return true; + + if (PeekTok.isNot(tok::r_paren)) { + PP.Diag(PeekTok, diag::err_pp_expected_rparen); + return true; + } + DT.State = DefinedTracker::Unknown; + } + PP.LexNonComment(PeekTok); // Eat the ). + return false; + + case tok::plus: + // Unary plus doesn't modify the value. + PP.LexNonComment(PeekTok); + return EvaluateValue(Result, PeekTok, DT, ValueLive, PP); + case tok::minus: { + SourceLocation Loc = PeekTok.getLocation(); + PP.LexNonComment(PeekTok); + if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; + // C99 6.5.3.3p3: The sign of the result matches the sign of the operand. + Result = -Result; + + bool Overflow = false; + if (Result.isUnsigned()) + Overflow = Result.isNegative(); + else if (Result.isMinSignedValue()) + Overflow = true; // -MININT is the only thing that overflows. + + // If this operator is live and overflowed, report the issue. + if (Overflow && ValueLive) + PP.Diag(Loc, diag::warn_pp_expr_overflow); + + DT.State = DefinedTracker::Unknown; + return false; + } + + case tok::tilde: + PP.LexNonComment(PeekTok); + if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; + // C99 6.5.3.3p4: The sign of the result matches the sign of the operand. + Result = ~Result; + DT.State = DefinedTracker::Unknown; + return false; + + case tok::exclaim: + PP.LexNonComment(PeekTok); + if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; + Result = !Result; + // C99 6.5.3.3p5: The sign of the result is 'int', aka it is signed. + Result.setIsUnsigned(false); + + if (DT.State == DefinedTracker::DefinedMacro) + DT.State = DefinedTracker::NotDefinedMacro; + else if (DT.State == DefinedTracker::NotDefinedMacro) + DT.State = DefinedTracker::DefinedMacro; + return false; + + // FIXME: Handle #assert + } +} + + + +/// getPrecedence - Return the precedence of the specified binary operator +/// token. This returns: +/// ~0 - Invalid token. +/// 14 - *,/,% +/// 13 - -,+ +/// 12 - <<,>> +/// 11 - >=, <=, >, < +/// 10 - ==, != +/// 9 - & +/// 8 - ^ +/// 7 - | +/// 6 - && +/// 5 - || +/// 4 - ? +/// 3 - : +/// 0 - eom, ) +static unsigned getPrecedence(tok::TokenKind Kind) { + switch (Kind) { + default: return ~0U; + case tok::percent: + case tok::slash: + case tok::star: return 14; + case tok::plus: + case tok::minus: return 13; + case tok::lessless: + case tok::greatergreater: return 12; + case tok::lessequal: + case tok::less: + case tok::greaterequal: + case tok::greater: return 11; + case tok::exclaimequal: + case tok::equalequal: return 10; + case tok::amp: return 9; + case tok::caret: return 8; + case tok::pipe: return 7; + case tok::ampamp: return 6; + case tok::pipepipe: return 5; + case tok::question: return 4; + case tok::colon: return 3; + case tok::comma: return 2; + case tok::r_paren: return 0; // Lowest priority, end of expr. + case tok::eom: return 0; // Lowest priority, end of macro. + } +} + + +/// EvaluateDirectiveSubExpr - Evaluate the subexpression whose first token is +/// PeekTok, and whose precedence is PeekPrec. +/// +/// If ValueLive is false, then this value is being evaluated in a context where +/// the result is not used. As such, avoid diagnostics that relate to +/// evaluation. +static bool EvaluateDirectiveSubExpr(llvm::APSInt &LHS, unsigned MinPrec, + Token &PeekTok, bool ValueLive, + Preprocessor &PP) { + unsigned PeekPrec = getPrecedence(PeekTok.getKind()); + // If this token isn't valid, report the error. + if (PeekPrec == ~0U) { + PP.Diag(PeekTok, diag::err_pp_expr_bad_token); + return true; + } + + while (1) { + // If this token has a lower precedence than we are allowed to parse, return + // it so that higher levels of the recursion can parse it. + if (PeekPrec < MinPrec) + return false; + + tok::TokenKind Operator = PeekTok.getKind(); + + // If this is a short-circuiting operator, see if the RHS of the operator is + // dead. Note that this cannot just clobber ValueLive. Consider + // "0 && 1 ? 4 : 1 / 0", which is parsed as "(0 && 1) ? 4 : (1 / 0)". In + // this example, the RHS of the && being dead does not make the rest of the + // expr dead. + bool RHSIsLive; + if (Operator == tok::ampamp && LHS == 0) + RHSIsLive = false; // RHS of "0 && x" is dead. + else if (Operator == tok::pipepipe && LHS != 0) + RHSIsLive = false; // RHS of "1 || x" is dead. + else if (Operator == tok::question && LHS == 0) + RHSIsLive = false; // RHS (x) of "0 ? x : y" is dead. + else + RHSIsLive = ValueLive; + + // Consume the operator, saving the operator token for error reporting. + Token OpToken = PeekTok; + PP.LexNonComment(PeekTok); + + llvm::APSInt RHS(LHS.getBitWidth()); + // Parse the RHS of the operator. + DefinedTracker DT; + if (EvaluateValue(RHS, PeekTok, DT, RHSIsLive, PP)) return true; + + // Remember the precedence of this operator and get the precedence of the + // operator immediately to the right of the RHS. + unsigned ThisPrec = PeekPrec; + PeekPrec = getPrecedence(PeekTok.getKind()); + + // If this token isn't valid, report the error. + if (PeekPrec == ~0U) { + PP.Diag(PeekTok, diag::err_pp_expr_bad_token); + return true; + } + + bool isRightAssoc = Operator == tok::question; + + // Get the precedence of the operator to the right of the RHS. If it binds + // more tightly with RHS than we do, evaluate it completely first. + if (ThisPrec < PeekPrec || + (ThisPrec == PeekPrec && isRightAssoc)) { + if (EvaluateDirectiveSubExpr(RHS, ThisPrec+1, PeekTok, RHSIsLive, PP)) + return true; + PeekPrec = getPrecedence(PeekTok.getKind()); + } + assert(PeekPrec <= ThisPrec && "Recursion didn't work!"); + + // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if + // either operand is unsigned. Don't do this for x and y in "x ? y : z". + llvm::APSInt Res(LHS.getBitWidth()); + if (Operator != tok::question) { + Res.setIsUnsigned(LHS.isUnsigned()|RHS.isUnsigned()); + // If this just promoted something from signed to unsigned, and if the + // value was negative, warn about it. + if (ValueLive && Res.isUnsigned()) { + if (!LHS.isUnsigned() && LHS.isNegative()) + PP.Diag(OpToken, diag::warn_pp_convert_lhs_to_positive, + LHS.toStringSigned() + " to " + LHS.toStringUnsigned()); + if (!RHS.isUnsigned() && RHS.isNegative()) + PP.Diag(OpToken, diag::warn_pp_convert_rhs_to_positive, + RHS.toStringSigned() + " to " + RHS.toStringUnsigned()); + } + LHS.setIsUnsigned(Res.isUnsigned()); + RHS.setIsUnsigned(Res.isUnsigned()); + } + + // FIXME: All of these should detect and report overflow?? + bool Overflow = false; + switch (Operator) { + default: assert(0 && "Unknown operator token!"); + case tok::percent: + if (RHS == 0) { + if (ValueLive) PP.Diag(OpToken, diag::err_pp_remainder_by_zero); + return true; + } + Res = LHS % RHS; + break; + case tok::slash: + if (RHS == 0) { + if (ValueLive) PP.Diag(OpToken, diag::err_pp_division_by_zero); + return true; + } + Res = LHS / RHS; + if (LHS.isSigned()) + Overflow = LHS.isMinSignedValue() && RHS.isAllOnesValue(); // MININT/-1 + break; + case tok::star: + Res = LHS * RHS; + if (LHS != 0 && RHS != 0) + Overflow = Res/RHS != LHS || Res/LHS != RHS; + break; + case tok::lessless: { + // Determine whether overflow is about to happen. + unsigned ShAmt = static_cast<unsigned>(RHS.getLimitedValue()); + if (ShAmt >= LHS.getBitWidth()) + Overflow = true, ShAmt = LHS.getBitWidth()-1; + else if (LHS.isUnsigned()) + Overflow = ShAmt > LHS.countLeadingZeros(); + else if (LHS.isNonNegative()) + Overflow = ShAmt >= LHS.countLeadingZeros(); // Don't allow sign change. + else + Overflow = ShAmt >= LHS.countLeadingOnes(); + + Res = LHS << ShAmt; + break; + } + case tok::greatergreater: { + // Determine whether overflow is about to happen. + unsigned ShAmt = static_cast<unsigned>(RHS.getLimitedValue()); + if (ShAmt >= LHS.getBitWidth()) + Overflow = true, ShAmt = LHS.getBitWidth()-1; + Res = LHS >> ShAmt; + break; + } + case tok::plus: + Res = LHS + RHS; + if (LHS.isUnsigned()) + Overflow = Res.ult(LHS); + else if (LHS.isNonNegative() == RHS.isNonNegative() && + Res.isNonNegative() != LHS.isNonNegative()) + Overflow = true; // Overflow for signed addition. + break; + case tok::minus: + Res = LHS - RHS; + if (LHS.isUnsigned()) + Overflow = Res.ugt(LHS); + else if (LHS.isNonNegative() != RHS.isNonNegative() && + Res.isNonNegative() != LHS.isNonNegative()) + Overflow = true; // Overflow for signed subtraction. + break; + case tok::lessequal: + Res = LHS <= RHS; + Res.setIsUnsigned(false); // C99 6.5.8p6, result is always int (signed) + break; + case tok::less: + Res = LHS < RHS; + Res.setIsUnsigned(false); // C99 6.5.8p6, result is always int (signed) + break; + case tok::greaterequal: + Res = LHS >= RHS; + Res.setIsUnsigned(false); // C99 6.5.8p6, result is always int (signed) + break; + case tok::greater: + Res = LHS > RHS; + Res.setIsUnsigned(false); // C99 6.5.8p6, result is always int (signed) + break; + case tok::exclaimequal: + Res = LHS != RHS; + Res.setIsUnsigned(false); // C99 6.5.9p3, result is always int (signed) + break; + case tok::equalequal: + Res = LHS == RHS; + Res.setIsUnsigned(false); // C99 6.5.9p3, result is always int (signed) + break; + case tok::amp: + Res = LHS & RHS; + break; + case tok::caret: + Res = LHS ^ RHS; + break; + case tok::pipe: + Res = LHS | RHS; + break; + case tok::ampamp: + Res = (LHS != 0 && RHS != 0); + Res.setIsUnsigned(false); // C99 6.5.13p3, result is always int (signed) + break; + case tok::pipepipe: + Res = (LHS != 0 || RHS != 0); + Res.setIsUnsigned(false); // C99 6.5.14p3, result is always int (signed) + break; + case tok::comma: + PP.Diag(OpToken, diag::ext_pp_comma_expr); + Res = RHS; // LHS = LHS,RHS -> RHS. + break; + case tok::question: { + // Parse the : part of the expression. + if (PeekTok.isNot(tok::colon)) { + PP.Diag(OpToken, diag::err_pp_question_without_colon); + return true; + } + // Consume the :. + PP.LexNonComment(PeekTok); + + // Evaluate the value after the :. + bool AfterColonLive = ValueLive && LHS == 0; + llvm::APSInt AfterColonVal(LHS.getBitWidth()); + DefinedTracker DT; + if (EvaluateValue(AfterColonVal, PeekTok, DT, AfterColonLive, PP)) + return true; + + // Parse anything after the : RHS that has a higher precedence than ?. + if (EvaluateDirectiveSubExpr(AfterColonVal, ThisPrec+1, + PeekTok, AfterColonLive, PP)) + return true; + + // Now that we have the condition, the LHS and the RHS of the :, evaluate. + Res = LHS != 0 ? RHS : AfterColonVal; + + // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if + // either operand is unsigned. + Res.setIsUnsigned(RHS.isUnsigned() | AfterColonVal.isUnsigned()); + + // Figure out the precedence of the token after the : part. + PeekPrec = getPrecedence(PeekTok.getKind()); + break; + } + case tok::colon: + // Don't allow :'s to float around without being part of ?: exprs. + PP.Diag(OpToken, diag::err_pp_colon_without_question); + return true; + } + + // If this operator is live and overflowed, report the issue. + if (Overflow && ValueLive) + PP.Diag(OpToken, diag::warn_pp_expr_overflow); + + // Put the result back into 'LHS' for our next iteration. + LHS = Res; + } + + return false; +} + +/// EvaluateDirectiveExpression - Evaluate an integer constant expression that +/// may occur after a #if or #elif directive. If the expression is equivalent +/// to "!defined(X)" return X in IfNDefMacro. +bool Preprocessor:: +EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { + // Peek ahead one token. + Token Tok; + Lex(Tok); + + // C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t. + unsigned BitWidth = getTargetInfo().getIntMaxTWidth(); + + llvm::APSInt ResVal(BitWidth); + DefinedTracker DT; + if (EvaluateValue(ResVal, Tok, DT, true, *this)) { + // Parse error, skip the rest of the macro line. + if (Tok.isNot(tok::eom)) + DiscardUntilEndOfDirective(); + return false; + } + + // If we are at the end of the expression after just parsing a value, there + // must be no (unparenthesized) binary operators involved, so we can exit + // directly. + if (Tok.is(tok::eom)) { + // If the expression we parsed was of the form !defined(macro), return the + // macro in IfNDefMacro. + if (DT.State == DefinedTracker::NotDefinedMacro) + IfNDefMacro = DT.TheMacro; + + return ResVal != 0; + } + + // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the + // operator and the stuff after it. + if (EvaluateDirectiveSubExpr(ResVal, 1, Tok, true, *this)) { + // Parse error, skip the rest of the macro line. + if (Tok.isNot(tok::eom)) + DiscardUntilEndOfDirective(); + return false; + } + + // If we aren't at the tok::eom token, something bad happened, like an extra + // ')' token. + if (Tok.isNot(tok::eom)) { + Diag(Tok, diag::err_pp_expected_eol); + DiscardUntilEndOfDirective(); + } + + return ResVal != 0; +} + diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp new file mode 100644 index 00000000000..bd0ff7f94a1 --- /dev/null +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -0,0 +1,401 @@ +//===--- PPLexerChange.cpp - Handle changing lexers in the preprocessor ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements pieces of the Preprocessor interface that manage the +// current lexer stack. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/SourceManager.h" +using namespace clang; + +PPCallbacks::~PPCallbacks() { +} + + +//===----------------------------------------------------------------------===// +// Miscellaneous Methods. +//===----------------------------------------------------------------------===// + +/// isInPrimaryFile - Return true if we're in the top-level file, not in a +/// #include. This looks through macro expansions and active _Pragma lexers. +bool Preprocessor::isInPrimaryFile() const { + if (CurLexer && !CurLexer->Is_PragmaLexer) + return IncludeMacroStack.empty(); + + // If there are any stacked lexers, we're in a #include. + assert(IncludeMacroStack[0].TheLexer && + !IncludeMacroStack[0].TheLexer->Is_PragmaLexer && + "Top level include stack isn't our primary lexer?"); + for (unsigned i = 1, e = IncludeMacroStack.size(); i != e; ++i) + if (IncludeMacroStack[i].TheLexer && + !IncludeMacroStack[i].TheLexer->Is_PragmaLexer) + return false; + return true; +} + +/// getCurrentLexer - Return the current file lexer being lexed from. Note +/// that this ignores any potentially active macro expansions and _Pragma +/// expansions going on at the time. +Lexer *Preprocessor::getCurrentFileLexer() const { + if (CurLexer && !CurLexer->Is_PragmaLexer) return CurLexer; + + // Look for a stacked lexer. + for (unsigned i = IncludeMacroStack.size(); i != 0; --i) { + Lexer *L = IncludeMacroStack[i-1].TheLexer; + if (L && !L->Is_PragmaLexer) // Ignore macro & _Pragma expansions. + return L; + } + return 0; +} + +/// LookAhead - This peeks ahead N tokens and returns that token without +/// consuming any tokens. LookAhead(0) returns 'Tok', LookAhead(1) returns +/// the token after Tok, etc. +/// +/// NOTE: is a relatively expensive method, so it should not be used in common +/// code paths if possible! +/// +Token Preprocessor::LookAhead(unsigned N) { + // FIXME: Optimize the case where multiple lookahead calls are used back to + // back. Consider if the the parser contained (dynamically): + // Lookahead(1); Lookahead(1); Lookahead(1) + // This would return the same token 3 times, but would end up making lots of + // token stream lexers to do it. To handle this common case, see if the top + // of the lexer stack is a TokenStreamLexer with macro expansion disabled. If + // so, see if it has 'N' tokens available in it. If so, just return the + // token. + + // FIXME: Optimize the case when the parser does multiple nearby lookahead + // calls. For example, consider: + // Lookahead(0); Lookahead(1); Lookahead(2); + // The previous optimization won't apply, and there won't be any space left in + // the array that was previously new'd. To handle this, always round up the + // size we new to a multiple of 16 tokens. If the previous buffer has space + // left, we can just grow it. This means we only have to do the new 1/16th as + // often. + + Token *LookaheadTokens = new Token[N]; + + // Read N+1 tokens into LookaheadTokens. After this loop, Tok is the token + // to return. + Token Tok; + unsigned NumTokens = 0; + for (; N != ~0U; --N, ++NumTokens) { + Lex(Tok); + LookaheadTokens[NumTokens] = Tok; + + // If we got to EOF, don't lex past it. This will cause LookAhead to return + // the EOF token. + if (Tok.is(tok::eof)) + break; + } + + // Okay, at this point, we have the token we want to return in Tok. However, + // we read it and a bunch of other stuff (in LookaheadTokens) that we must + // allow subsequent calls to 'Lex' to return. To do this, we push a new token + // lexer onto the lexer stack with the tokens we read here. This passes + // ownership of LookaheadTokens to EnterTokenStream. + // + // Note that we disable macro expansion of the tokens from this buffer, since + // any macros have already been expanded, and the internal preprocessor state + // may already read past new macros. Consider something like LookAhead(1) on + // X + // #define X 14 + // Y + // The lookahead call should return 'Y', and the next Lex call should return + // 'X' even though X -> 14 has already been entered as a macro. + // + EnterTokenStream(LookaheadTokens, NumTokens, true /*DisableExpansion*/, + true /*OwnsTokens*/); + return Tok; +} + + +//===----------------------------------------------------------------------===// +// Methods for Entering and Callbacks for leaving various contexts +//===----------------------------------------------------------------------===// + +/// EnterSourceFile - Add a source file to the top of the include stack and +/// start lexing tokens from it instead of the current buffer. Return true +/// on failure. +void Preprocessor::EnterSourceFile(unsigned FileID, + const DirectoryLookup *CurDir) { + assert(CurTokenLexer == 0 && "Cannot #include a file inside a macro!"); + ++NumEnteredSourceFiles; + + if (MaxIncludeStackDepth < IncludeMacroStack.size()) + MaxIncludeStackDepth = IncludeMacroStack.size(); + + Lexer *TheLexer = new Lexer(SourceLocation::getFileLoc(FileID, 0), *this); + EnterSourceFileWithLexer(TheLexer, CurDir); +} + +/// EnterSourceFile - Add a source file to the top of the include stack and +/// start lexing tokens from it instead of the current buffer. +void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, + const DirectoryLookup *CurDir) { + + // Add the current lexer to the include stack. + if (CurLexer || CurTokenLexer) + IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup, + CurTokenLexer)); + + CurLexer = TheLexer; + CurDirLookup = CurDir; + CurTokenLexer = 0; + + // Notify the client, if desired, that we are in a new source file. + if (Callbacks && !CurLexer->Is_PragmaLexer) { + DirectoryLookup::DirType FileType = DirectoryLookup::NormalHeaderDir; + + // Get the file entry for the current file. + if (const FileEntry *FE = + SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc())) + FileType = HeaderInfo.getFileDirFlavor(FE); + + Callbacks->FileChanged(CurLexer->getFileLoc(), + PPCallbacks::EnterFile, FileType); + } +} + + + +/// EnterMacro - Add a Macro to the top of the include stack and start lexing +/// tokens from it instead of the current buffer. +void Preprocessor::EnterMacro(Token &Tok, MacroArgs *Args) { + IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup, + CurTokenLexer)); + CurLexer = 0; + CurDirLookup = 0; + + if (NumCachedTokenLexers == 0) { + CurTokenLexer = new TokenLexer(Tok, Args, *this); + } else { + CurTokenLexer = TokenLexerCache[--NumCachedTokenLexers]; + CurTokenLexer->Init(Tok, Args); + } +} + +/// EnterTokenStream - Add a "macro" context to the top of the include stack, +/// which will cause the lexer to start returning the specified tokens. +/// +/// If DisableMacroExpansion is true, tokens lexed from the token stream will +/// not be subject to further macro expansion. Otherwise, these tokens will +/// be re-macro-expanded when/if expansion is enabled. +/// +/// If OwnsTokens is false, this method assumes that the specified stream of +/// tokens has a permanent owner somewhere, so they do not need to be copied. +/// If it is true, it assumes the array of tokens is allocated with new[] and +/// must be freed. +/// +void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, + bool DisableMacroExpansion, + bool OwnsTokens) { + // Save our current state. + IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup, + CurTokenLexer)); + CurLexer = 0; + CurDirLookup = 0; + + // Create a macro expander to expand from the specified token stream. + if (NumCachedTokenLexers == 0) { + CurTokenLexer = new TokenLexer(Toks, NumToks, DisableMacroExpansion, + OwnsTokens, *this); + } else { + CurTokenLexer = TokenLexerCache[--NumCachedTokenLexers]; + CurTokenLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens); + } +} + +/// HandleEndOfFile - This callback is invoked when the lexer hits the end of +/// the current file. This either returns the EOF token or pops a level off +/// the include stack and keeps going. +bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { + assert(!CurTokenLexer && + "Ending a file when currently in a macro!"); + + // See if this file had a controlling macro. + if (CurLexer) { // Not ending a macro, ignore it. + if (const IdentifierInfo *ControllingMacro = + CurLexer->MIOpt.GetControllingMacroAtEndOfFile()) { + // Okay, this has a controlling macro, remember in PerFileInfo. + if (const FileEntry *FE = + SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc())) + HeaderInfo.SetFileControllingMacro(FE, ControllingMacro); + } + } + + // If this is a #include'd file, pop it off the include stack and continue + // lexing the #includer file. + if (!IncludeMacroStack.empty()) { + // We're done with the #included file. + RemoveTopOfLexerStack(); + + // Notify the client, if desired, that we are in a new source file. + if (Callbacks && !isEndOfMacro && CurLexer) { + DirectoryLookup::DirType FileType = DirectoryLookup::NormalHeaderDir; + + // Get the file entry for the current file. + if (const FileEntry *FE = + SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc())) + FileType = HeaderInfo.getFileDirFlavor(FE); + + Callbacks->FileChanged(CurLexer->getSourceLocation(CurLexer->BufferPtr), + PPCallbacks::ExitFile, FileType); + } + + // Client should lex another token. + return false; + } + + // If the file ends with a newline, form the EOF token on the newline itself, + // rather than "on the line following it", which doesn't exist. This makes + // diagnostics relating to the end of file include the last file that the user + // actually typed, which is goodness. + const char *EndPos = CurLexer->BufferEnd; + if (EndPos != CurLexer->BufferStart && + (EndPos[-1] == '\n' || EndPos[-1] == '\r')) { + --EndPos; + + // Handle \n\r and \r\n: + if (EndPos != CurLexer->BufferStart && + (EndPos[-1] == '\n' || EndPos[-1] == '\r') && + EndPos[-1] != EndPos[0]) + --EndPos; + } + + Result.startToken(); + CurLexer->BufferPtr = EndPos; + CurLexer->FormTokenWithChars(Result, EndPos); + Result.setKind(tok::eof); + + // We're done with the #included file. + delete CurLexer; + CurLexer = 0; + + // This is the end of the top-level file. If the diag::pp_macro_not_used + // diagnostic is enabled, look for macros that have not been used. + if (Diags.getDiagnosticLevel(diag::pp_macro_not_used) != Diagnostic::Ignored){ + for (llvm::DenseMap<IdentifierInfo*, MacroInfo*>::iterator I = + Macros.begin(), E = Macros.end(); I != E; ++I) { + if (!I->second->isUsed()) + Diag(I->second->getDefinitionLoc(), diag::pp_macro_not_used); + } + } + return true; +} + +/// HandleEndOfTokenLexer - This callback is invoked when the current TokenLexer +/// hits the end of its token stream. +bool Preprocessor::HandleEndOfTokenLexer(Token &Result) { + assert(CurTokenLexer && !CurLexer && + "Ending a macro when currently in a #include file!"); + + // Delete or cache the now-dead macro expander. + if (NumCachedTokenLexers == TokenLexerCacheSize) + delete CurTokenLexer; + else + TokenLexerCache[NumCachedTokenLexers++] = CurTokenLexer; + + // Handle this like a #include file being popped off the stack. + CurTokenLexer = 0; + return HandleEndOfFile(Result, true); +} + +/// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the +/// lexer stack. This should only be used in situations where the current +/// state of the top-of-stack lexer is unknown. +void Preprocessor::RemoveTopOfLexerStack() { + assert(!IncludeMacroStack.empty() && "Ran out of stack entries to load"); + + if (CurTokenLexer) { + // Delete or cache the now-dead macro expander. + if (NumCachedTokenLexers == TokenLexerCacheSize) + delete CurTokenLexer; + else + TokenLexerCache[NumCachedTokenLexers++] = CurTokenLexer; + } else { + delete CurLexer; + } + CurLexer = IncludeMacroStack.back().TheLexer; + CurDirLookup = IncludeMacroStack.back().TheDirLookup; + CurTokenLexer = IncludeMacroStack.back().TheTokenLexer; + IncludeMacroStack.pop_back(); +} + +/// HandleMicrosoftCommentPaste - When the macro expander pastes together a +/// comment (/##/) in microsoft mode, this method handles updating the current +/// state, returning the token on the next source line. +void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) { + assert(CurTokenLexer && !CurLexer && + "Pasted comment can only be formed from macro"); + + // We handle this by scanning for the closest real lexer, switching it to + // raw mode and preprocessor mode. This will cause it to return \n as an + // explicit EOM token. + Lexer *FoundLexer = 0; + bool LexerWasInPPMode = false; + for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) { + IncludeStackInfo &ISI = *(IncludeMacroStack.end()-i-1); + if (ISI.TheLexer == 0) continue; // Scan for a real lexer. + + // Once we find a real lexer, mark it as raw mode (disabling macro + // expansions) and preprocessor mode (return EOM). We know that the lexer + // was *not* in raw mode before, because the macro that the comment came + // from was expanded. However, it could have already been in preprocessor + // mode (#if COMMENT) in which case we have to return it to that mode and + // return EOM. + FoundLexer = ISI.TheLexer; + FoundLexer->LexingRawMode = true; + LexerWasInPPMode = FoundLexer->ParsingPreprocessorDirective; + FoundLexer->ParsingPreprocessorDirective = true; + break; + } + + // Okay, we either found and switched over the lexer, or we didn't find a + // lexer. In either case, finish off the macro the comment came from, getting + // the next token. + if (!HandleEndOfTokenLexer(Tok)) Lex(Tok); + + // Discarding comments as long as we don't have EOF or EOM. This 'comments + // out' the rest of the line, including any tokens that came from other macros + // that were active, as in: + // #define submacro a COMMENT b + // submacro c + // which should lex to 'a' only: 'b' and 'c' should be removed. + while (Tok.isNot(tok::eom) && Tok.isNot(tok::eof)) + Lex(Tok); + + // If we got an eom token, then we successfully found the end of the line. + if (Tok.is(tok::eom)) { + assert(FoundLexer && "Can't get end of line without an active lexer"); + // Restore the lexer back to normal mode instead of raw mode. + FoundLexer->LexingRawMode = false; + + // If the lexer was already in preprocessor mode, just return the EOM token + // to finish the preprocessor line. + if (LexerWasInPPMode) return; + + // Otherwise, switch out of PP mode and return the next lexed token. + FoundLexer->ParsingPreprocessorDirective = false; + return Lex(Tok); + } + + // If we got an EOF token, then we reached the end of the token stream but + // didn't find an explicit \n. This can only happen if there was no lexer + // active (an active lexer would return EOM at EOF if there was no \n in + // preprocessor directive mode), so just return EOF as our token. + assert(!FoundLexer && "Lexer should return EOM before EOF in PP mode"); +} diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp new file mode 100644 index 00000000000..8218d0ac06e --- /dev/null +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -0,0 +1,523 @@ +//===--- MacroExpansion.cpp - Top level Macro Expansion -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the top level handling of macro expasion for the +// preprocessor. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Preprocessor.h" +#include "MacroArgs.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/Diagnostic.h" +using namespace clang; + +/// setMacroInfo - Specify a macro for this identifier. +/// +void Preprocessor::setMacroInfo(IdentifierInfo *II, MacroInfo *MI) { + if (MI == 0) { + if (II->hasMacroDefinition()) { + Macros.erase(II); + II->setHasMacroDefinition(false); + } + } else { + Macros[II] = MI; + II->setHasMacroDefinition(true); + } +} + +/// RegisterBuiltinMacro - Register the specified identifier in the identifier +/// table and mark it as a builtin macro to be expanded. +IdentifierInfo *Preprocessor::RegisterBuiltinMacro(const char *Name) { + // Get the identifier. + IdentifierInfo *Id = getIdentifierInfo(Name); + + // Mark it as being a macro that is builtin. + MacroInfo *MI = new MacroInfo(SourceLocation()); + MI->setIsBuiltinMacro(); + setMacroInfo(Id, MI); + return Id; +} + + +/// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the +/// identifier table. +void Preprocessor::RegisterBuiltinMacros() { + Ident__LINE__ = RegisterBuiltinMacro("__LINE__"); + Ident__FILE__ = RegisterBuiltinMacro("__FILE__"); + Ident__DATE__ = RegisterBuiltinMacro("__DATE__"); + Ident__TIME__ = RegisterBuiltinMacro("__TIME__"); + Ident_Pragma = RegisterBuiltinMacro("_Pragma"); + + // GCC Extensions. + Ident__BASE_FILE__ = RegisterBuiltinMacro("__BASE_FILE__"); + Ident__INCLUDE_LEVEL__ = RegisterBuiltinMacro("__INCLUDE_LEVEL__"); + Ident__TIMESTAMP__ = RegisterBuiltinMacro("__TIMESTAMP__"); +} + +/// isTrivialSingleTokenExpansion - Return true if MI, which has a single token +/// in its expansion, currently expands to that token literally. +static bool isTrivialSingleTokenExpansion(const MacroInfo *MI, + const IdentifierInfo *MacroIdent, + Preprocessor &PP) { + IdentifierInfo *II = MI->getReplacementToken(0).getIdentifierInfo(); + + // If the token isn't an identifier, it's always literally expanded. + if (II == 0) return true; + + // If the identifier is a macro, and if that macro is enabled, it may be + // expanded so it's not a trivial expansion. + if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled() && + // Fast expanding "#define X X" is ok, because X would be disabled. + II != MacroIdent) + return false; + + // If this is an object-like macro invocation, it is safe to trivially expand + // it. + if (MI->isObjectLike()) return true; + + // If this is a function-like macro invocation, it's safe to trivially expand + // as long as the identifier is not a macro argument. + for (MacroInfo::arg_iterator I = MI->arg_begin(), E = MI->arg_end(); + I != E; ++I) + if (*I == II) + return false; // Identifier is a macro argument. + + return true; +} + + +/// isNextPPTokenLParen - Determine whether the next preprocessor token to be +/// lexed is a '('. If so, consume the token and return true, if not, this +/// method should have no observable side-effect on the lexed tokens. +bool Preprocessor::isNextPPTokenLParen() { + // Do some quick tests for rejection cases. + unsigned Val; + if (CurLexer) + Val = CurLexer->isNextPPTokenLParen(); + else + Val = CurTokenLexer->isNextTokenLParen(); + + if (Val == 2) { + // We have run off the end. If it's a source file we don't + // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the + // macro stack. + if (CurLexer) + return false; + for (unsigned i = IncludeMacroStack.size(); i != 0; --i) { + IncludeStackInfo &Entry = IncludeMacroStack[i-1]; + if (Entry.TheLexer) + Val = Entry.TheLexer->isNextPPTokenLParen(); + else + Val = Entry.TheTokenLexer->isNextTokenLParen(); + + if (Val != 2) + break; + + // Ran off the end of a source file? + if (Entry.TheLexer) + return false; + } + } + + // Okay, if we know that the token is a '(', lex it and return. Otherwise we + // have found something that isn't a '(' or we found the end of the + // translation unit. In either case, return false. + if (Val != 1) + return false; + + Token Tok; + LexUnexpandedToken(Tok); + assert(Tok.is(tok::l_paren) && "Error computing l-paren-ness?"); + return true; +} + +/// HandleMacroExpandedIdentifier - If an identifier token is read that is to be +/// expanded as a macro, handle it and return the next token as 'Identifier'. +bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, + MacroInfo *MI) { + // If this is a macro exapnsion in the "#if !defined(x)" line for the file, + // then the macro could expand to different things in other contexts, we need + // to disable the optimization in this case. + if (CurLexer) CurLexer->MIOpt.ExpandedMacro(); + + // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially. + if (MI->isBuiltinMacro()) { + ExpandBuiltinMacro(Identifier); + return false; + } + + /// Args - If this is a function-like macro expansion, this contains, + /// for each macro argument, the list of tokens that were provided to the + /// invocation. + MacroArgs *Args = 0; + + // If this is a function-like macro, read the arguments. + if (MI->isFunctionLike()) { + // C99 6.10.3p10: If the preprocessing token immediately after the the macro + // name isn't a '(', this macro should not be expanded. Otherwise, consume + // it. + if (!isNextPPTokenLParen()) + return true; + + // Remember that we are now parsing the arguments to a macro invocation. + // Preprocessor directives used inside macro arguments are not portable, and + // this enables the warning. + InMacroArgs = true; + Args = ReadFunctionLikeMacroArgs(Identifier, MI); + + // Finished parsing args. + InMacroArgs = false; + + // If there was an error parsing the arguments, bail out. + if (Args == 0) return false; + + ++NumFnMacroExpanded; + } else { + ++NumMacroExpanded; + } + + // Notice that this macro has been used. + MI->setIsUsed(true); + + // If we started lexing a macro, enter the macro expansion body. + + // If this macro expands to no tokens, don't bother to push it onto the + // expansion stack, only to take it right back off. + if (MI->getNumTokens() == 0) { + // No need for arg info. + if (Args) Args->destroy(); + + // Ignore this macro use, just return the next token in the current + // buffer. + bool HadLeadingSpace = Identifier.hasLeadingSpace(); + bool IsAtStartOfLine = Identifier.isAtStartOfLine(); + + Lex(Identifier); + + // If the identifier isn't on some OTHER line, inherit the leading + // whitespace/first-on-a-line property of this token. This handles + // stuff like "! XX," -> "! ," and " XX," -> " ,", when XX is + // empty. + if (!Identifier.isAtStartOfLine()) { + if (IsAtStartOfLine) Identifier.setFlag(Token::StartOfLine); + if (HadLeadingSpace) Identifier.setFlag(Token::LeadingSpace); + } + ++NumFastMacroExpanded; + return false; + + } else if (MI->getNumTokens() == 1 && + isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo(), + *this)){ + // Otherwise, if this macro expands into a single trivially-expanded + // token: expand it now. This handles common cases like + // "#define VAL 42". + + // Propagate the isAtStartOfLine/hasLeadingSpace markers of the macro + // identifier to the expanded token. + bool isAtStartOfLine = Identifier.isAtStartOfLine(); + bool hasLeadingSpace = Identifier.hasLeadingSpace(); + + // Remember where the token is instantiated. + SourceLocation InstantiateLoc = Identifier.getLocation(); + + // Replace the result token. + Identifier = MI->getReplacementToken(0); + + // Restore the StartOfLine/LeadingSpace markers. + Identifier.setFlagValue(Token::StartOfLine , isAtStartOfLine); + Identifier.setFlagValue(Token::LeadingSpace, hasLeadingSpace); + + // Update the tokens location to include both its logical and physical + // locations. + SourceLocation Loc = + SourceMgr.getInstantiationLoc(Identifier.getLocation(), InstantiateLoc); + Identifier.setLocation(Loc); + + // If this is #define X X, we must mark the result as unexpandible. + if (IdentifierInfo *NewII = Identifier.getIdentifierInfo()) + if (getMacroInfo(NewII) == MI) + Identifier.setFlag(Token::DisableExpand); + + // Since this is not an identifier token, it can't be macro expanded, so + // we're done. + ++NumFastMacroExpanded; + return false; + } + + // Start expanding the macro. + EnterMacro(Identifier, Args); + + // Now that the macro is at the top of the include stack, ask the + // preprocessor to read the next token from it. + Lex(Identifier); + return false; +} + +/// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is +/// invoked to read all of the actual arguments specified for the macro +/// invocation. This returns null on error. +MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, + MacroInfo *MI) { + // The number of fixed arguments to parse. + unsigned NumFixedArgsLeft = MI->getNumArgs(); + bool isVariadic = MI->isVariadic(); + + // Outer loop, while there are more arguments, keep reading them. + Token Tok; + Tok.setKind(tok::comma); + --NumFixedArgsLeft; // Start reading the first arg. + + // ArgTokens - Build up a list of tokens that make up each argument. Each + // argument is separated by an EOF token. Use a SmallVector so we can avoid + // heap allocations in the common case. + llvm::SmallVector<Token, 64> ArgTokens; + + unsigned NumActuals = 0; + while (Tok.is(tok::comma)) { + // C99 6.10.3p11: Keep track of the number of l_parens we have seen. Note + // that we already consumed the first one. + unsigned NumParens = 0; + + while (1) { + // Read arguments as unexpanded tokens. This avoids issues, e.g., where + // an argument value in a macro could expand to ',' or '(' or ')'. + LexUnexpandedToken(Tok); + + if (Tok.is(tok::eof) || Tok.is(tok::eom)) { // "#if f(<eof>" & "#if f(\n" + Diag(MacroName, diag::err_unterm_macro_invoc); + // Do not lose the EOF/EOM. Return it to the client. + MacroName = Tok; + return 0; + } else if (Tok.is(tok::r_paren)) { + // If we found the ) token, the macro arg list is done. + if (NumParens-- == 0) + break; + } else if (Tok.is(tok::l_paren)) { + ++NumParens; + } else if (Tok.is(tok::comma) && NumParens == 0) { + // Comma ends this argument if there are more fixed arguments expected. + if (NumFixedArgsLeft) + break; + + // If this is not a variadic macro, too many args were specified. + if (!isVariadic) { + // Emit the diagnostic at the macro name in case there is a missing ). + // Emitting it at the , could be far away from the macro name. + Diag(MacroName, diag::err_too_many_args_in_macro_invoc); + return 0; + } + // Otherwise, continue to add the tokens to this variable argument. + } else if (Tok.is(tok::comment) && !KeepMacroComments) { + // If this is a comment token in the argument list and we're just in + // -C mode (not -CC mode), discard the comment. + continue; + } else if (Tok.is(tok::identifier)) { + // Reading macro arguments can cause macros that we are currently + // expanding from to be popped off the expansion stack. Doing so causes + // them to be reenabled for expansion. Here we record whether any + // identifiers we lex as macro arguments correspond to disabled macros. + // If so, we mark the token as noexpand. This is a subtle aspect of + // C99 6.10.3.4p2. + if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo())) + if (!MI->isEnabled()) + Tok.setFlag(Token::DisableExpand); + } + + ArgTokens.push_back(Tok); + } + + // Empty arguments are standard in C99 and supported as an extension in + // other modes. + if (ArgTokens.empty() && !Features.C99) + Diag(Tok, diag::ext_empty_fnmacro_arg); + + // Add a marker EOF token to the end of the token list for this argument. + Token EOFTok; + EOFTok.startToken(); + EOFTok.setKind(tok::eof); + EOFTok.setLocation(Tok.getLocation()); + EOFTok.setLength(0); + ArgTokens.push_back(EOFTok); + ++NumActuals; + --NumFixedArgsLeft; + }; + + // Okay, we either found the r_paren. Check to see if we parsed too few + // arguments. + unsigned MinArgsExpected = MI->getNumArgs(); + + // See MacroArgs instance var for description of this. + bool isVarargsElided = false; + + if (NumActuals < MinArgsExpected) { + // There are several cases where too few arguments is ok, handle them now. + if (NumActuals+1 == MinArgsExpected && MI->isVariadic()) { + // Varargs where the named vararg parameter is missing: ok as extension. + // #define A(x, ...) + // A("blah") + Diag(Tok, diag::ext_missing_varargs_arg); + + // Remember this occurred if this is a C99 macro invocation with at least + // one actual argument. + isVarargsElided = MI->isC99Varargs() && MI->getNumArgs() > 1; + } else if (MI->getNumArgs() == 1) { + // #define A(x) + // A() + // is ok because it is an empty argument. + + // Empty arguments are standard in C99 and supported as an extension in + // other modes. + if (ArgTokens.empty() && !Features.C99) + Diag(Tok, diag::ext_empty_fnmacro_arg); + } else { + // Otherwise, emit the error. + Diag(Tok, diag::err_too_few_args_in_macro_invoc); + return 0; + } + + // Add a marker EOF token to the end of the token list for this argument. + SourceLocation EndLoc = Tok.getLocation(); + Tok.startToken(); + Tok.setKind(tok::eof); + Tok.setLocation(EndLoc); + Tok.setLength(0); + ArgTokens.push_back(Tok); + } + + return MacroArgs::create(MI, &ArgTokens[0], ArgTokens.size(),isVarargsElided); +} + +/// ComputeDATE_TIME - Compute the current time, enter it into the specified +/// scratch buffer, then return DATELoc/TIMELoc locations with the position of +/// the identifier tokens inserted. +static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc, + Preprocessor &PP) { + time_t TT = time(0); + struct tm *TM = localtime(&TT); + + static const char * const Months[] = { + "Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec" + }; + + char TmpBuffer[100]; + sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday, + TM->tm_year+1900); + DATELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer)); + + sprintf(TmpBuffer, "\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, TM->tm_sec); + TIMELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer)); +} + +/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded +/// as a builtin macro, handle it and return the next token as 'Tok'. +void Preprocessor::ExpandBuiltinMacro(Token &Tok) { + // Figure out which token this is. + IdentifierInfo *II = Tok.getIdentifierInfo(); + assert(II && "Can't be a macro without id info!"); + + // If this is an _Pragma directive, expand it, invoke the pragma handler, then + // lex the token after it. + if (II == Ident_Pragma) + return Handle_Pragma(Tok); + + ++NumBuiltinMacroExpanded; + + char TmpBuffer[100]; + + // Set up the return result. + Tok.setIdentifierInfo(0); + Tok.clearFlag(Token::NeedsCleaning); + + if (II == Ident__LINE__) { + // __LINE__ expands to a simple numeric value. + sprintf(TmpBuffer, "%u", SourceMgr.getLogicalLineNumber(Tok.getLocation())); + unsigned Length = strlen(TmpBuffer); + Tok.setKind(tok::numeric_constant); + Tok.setLength(Length); + Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation())); + } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) { + SourceLocation Loc = Tok.getLocation(); + if (II == Ident__BASE_FILE__) { + Diag(Tok, diag::ext_pp_base_file); + SourceLocation NextLoc = SourceMgr.getIncludeLoc(Loc); + while (NextLoc.isValid()) { + Loc = NextLoc; + NextLoc = SourceMgr.getIncludeLoc(Loc); + } + } + + // Escape this filename. Turn '\' -> '\\' '"' -> '\"' + std::string FN = SourceMgr.getSourceName(SourceMgr.getLogicalLoc(Loc)); + FN = '"' + Lexer::Stringify(FN) + '"'; + Tok.setKind(tok::string_literal); + Tok.setLength(FN.size()); + Tok.setLocation(CreateString(&FN[0], FN.size(), Tok.getLocation())); + } else if (II == Ident__DATE__) { + if (!DATELoc.isValid()) + ComputeDATE_TIME(DATELoc, TIMELoc, *this); + Tok.setKind(tok::string_literal); + Tok.setLength(strlen("\"Mmm dd yyyy\"")); + Tok.setLocation(SourceMgr.getInstantiationLoc(DATELoc, Tok.getLocation())); + } else if (II == Ident__TIME__) { + if (!TIMELoc.isValid()) + ComputeDATE_TIME(DATELoc, TIMELoc, *this); + Tok.setKind(tok::string_literal); + Tok.setLength(strlen("\"hh:mm:ss\"")); + Tok.setLocation(SourceMgr.getInstantiationLoc(TIMELoc, Tok.getLocation())); + } else if (II == Ident__INCLUDE_LEVEL__) { + Diag(Tok, diag::ext_pp_include_level); + + // Compute the include depth of this token. + unsigned Depth = 0; + SourceLocation Loc = SourceMgr.getIncludeLoc(Tok.getLocation()); + for (; Loc.isValid(); ++Depth) + Loc = SourceMgr.getIncludeLoc(Loc); + + // __INCLUDE_LEVEL__ expands to a simple numeric value. + sprintf(TmpBuffer, "%u", Depth); + unsigned Length = strlen(TmpBuffer); + Tok.setKind(tok::numeric_constant); + Tok.setLength(Length); + Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation())); + } else if (II == Ident__TIMESTAMP__) { + // MSVC, ICC, GCC, VisualAge C++ extension. The generated string should be + // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime. + Diag(Tok, diag::ext_pp_timestamp); + + // Get the file that we are lexing out of. If we're currently lexing from + // a macro, dig into the include stack. + const FileEntry *CurFile = 0; + Lexer *TheLexer = getCurrentFileLexer(); + + if (TheLexer) + CurFile = SourceMgr.getFileEntryForLoc(TheLexer->getFileLoc()); + + // If this file is older than the file it depends on, emit a diagnostic. + const char *Result; + if (CurFile) { + time_t TT = CurFile->getModificationTime(); + struct tm *TM = localtime(&TT); + Result = asctime(TM); + } else { + Result = "??? ??? ?? ??:??:?? ????\n"; + } + TmpBuffer[0] = '"'; + strcpy(TmpBuffer+1, Result); + unsigned Len = strlen(TmpBuffer); + TmpBuffer[Len-1] = '"'; // Replace the newline with a quote. + Tok.setKind(tok::string_literal); + Tok.setLength(Len); + Tok.setLocation(CreateString(TmpBuffer, Len, Tok.getLocation())); + } else { + assert(0 && "Unknown identifier!"); + } +} diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp new file mode 100644 index 00000000000..08ad1cf1d2f --- /dev/null +++ b/clang/lib/Lex/Pragma.cpp @@ -0,0 +1,386 @@ +//===--- Pragma.cpp - Pragma registration and handling --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PragmaHandler/PragmaTable interfaces and implements +// pragma related methods of the Preprocessor class. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Pragma.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/ADT/SmallVector.h" +using namespace clang; + +// Out-of-line destructor to provide a home for the class. +PragmaHandler::~PragmaHandler() { +} + +//===----------------------------------------------------------------------===// +// PragmaNamespace Implementation. +//===----------------------------------------------------------------------===// + + +PragmaNamespace::~PragmaNamespace() { + for (unsigned i = 0, e = Handlers.size(); i != e; ++i) + delete Handlers[i]; +} + +/// FindHandler - Check to see if there is already a handler for the +/// specified name. If not, return the handler for the null identifier if it +/// exists, otherwise return null. If IgnoreNull is true (the default) then +/// the null handler isn't returned on failure to match. +PragmaHandler *PragmaNamespace::FindHandler(const IdentifierInfo *Name, + bool IgnoreNull) const { + PragmaHandler *NullHandler = 0; + for (unsigned i = 0, e = Handlers.size(); i != e; ++i) { + if (Handlers[i]->getName() == Name) + return Handlers[i]; + + if (Handlers[i]->getName() == 0) + NullHandler = Handlers[i]; + } + return IgnoreNull ? 0 : NullHandler; +} + +void PragmaNamespace::HandlePragma(Preprocessor &PP, Token &Tok) { + // Read the 'namespace' that the directive is in, e.g. STDC. Do not macro + // expand it, the user can have a STDC #define, that should not affect this. + PP.LexUnexpandedToken(Tok); + + // Get the handler for this token. If there is no handler, ignore the pragma. + PragmaHandler *Handler = FindHandler(Tok.getIdentifierInfo(), false); + if (Handler == 0) return; + + // Otherwise, pass it down. + Handler->HandlePragma(PP, Tok); +} + +//===----------------------------------------------------------------------===// +// Preprocessor Pragma Directive Handling. +//===----------------------------------------------------------------------===// + +/// HandlePragmaDirective - The "#pragma" directive has been parsed. Lex the +/// rest of the pragma, passing it to the registered pragma handlers. +void Preprocessor::HandlePragmaDirective() { + ++NumPragma; + + // Invoke the first level of pragma handlers which reads the namespace id. + Token Tok; + PragmaHandlers->HandlePragma(*this, Tok); + + // If the pragma handler didn't read the rest of the line, consume it now. + if (CurLexer->ParsingPreprocessorDirective) + DiscardUntilEndOfDirective(); +} + +/// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then +/// return the first token after the directive. The _Pragma token has just +/// been read into 'Tok'. +void Preprocessor::Handle_Pragma(Token &Tok) { + // Remember the pragma token location. + SourceLocation PragmaLoc = Tok.getLocation(); + + // Read the '('. + Lex(Tok); + if (Tok.isNot(tok::l_paren)) + return Diag(PragmaLoc, diag::err__Pragma_malformed); + + // Read the '"..."'. + Lex(Tok); + if (Tok.isNot(tok::string_literal) && Tok.isNot(tok::wide_string_literal)) + return Diag(PragmaLoc, diag::err__Pragma_malformed); + + // Remember the string. + std::string StrVal = getSpelling(Tok); + SourceLocation StrLoc = Tok.getLocation(); + + // Read the ')'. + Lex(Tok); + if (Tok.isNot(tok::r_paren)) + return Diag(PragmaLoc, diag::err__Pragma_malformed); + + // The _Pragma is lexically sound. Destringize according to C99 6.10.9.1. + if (StrVal[0] == 'L') // Remove L prefix. + StrVal.erase(StrVal.begin()); + assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' && + "Invalid string token!"); + + // Remove the front quote, replacing it with a space, so that the pragma + // contents appear to have a space before them. + StrVal[0] = ' '; + + // Replace the terminating quote with a \n\0. + StrVal[StrVal.size()-1] = '\n'; + StrVal += '\0'; + + // Remove escaped quotes and escapes. + for (unsigned i = 0, e = StrVal.size(); i != e-1; ++i) { + if (StrVal[i] == '\\' && + (StrVal[i+1] == '\\' || StrVal[i+1] == '"')) { + // \\ -> '\' and \" -> '"'. + StrVal.erase(StrVal.begin()+i); + --e; + } + } + + // Plop the string (including the newline and trailing null) into a buffer + // where we can lex it. + SourceLocation TokLoc = CreateString(&StrVal[0], StrVal.size(), StrLoc); + const char *StrData = SourceMgr.getCharacterData(TokLoc); + + // Make and enter a lexer object so that we lex and expand the tokens just + // like any others. + Lexer *TL = new Lexer(TokLoc, *this, + StrData, StrData+StrVal.size()-1 /* no null */); + + // Ensure that the lexer thinks it is inside a directive, so that end \n will + // return an EOM token. + TL->ParsingPreprocessorDirective = true; + + // This lexer really is for _Pragma. + TL->Is_PragmaLexer = true; + + EnterSourceFileWithLexer(TL, 0); + + // With everything set up, lex this as a #pragma directive. + HandlePragmaDirective(); + + // Finally, return whatever came after the pragma directive. + return Lex(Tok); +} + + + +/// HandlePragmaOnce - Handle #pragma once. OnceTok is the 'once'. +/// +void Preprocessor::HandlePragmaOnce(Token &OnceTok) { + if (isInPrimaryFile()) { + Diag(OnceTok, diag::pp_pragma_once_in_main_file); + return; + } + + // Get the current file lexer we're looking at. Ignore _Pragma 'files' etc. + SourceLocation FileLoc = getCurrentFileLexer()->getFileLoc(); + + // Mark the file as a once-only file now. + HeaderInfo.MarkFileIncludeOnce(SourceMgr.getFileEntryForLoc(FileLoc)); +} + +void Preprocessor::HandlePragmaMark() { + assert(CurLexer && "No current lexer?"); + CurLexer->ReadToEndOfLine(); +} + + +/// HandlePragmaPoison - Handle #pragma GCC poison. PoisonTok is the 'poison'. +/// +void Preprocessor::HandlePragmaPoison(Token &PoisonTok) { + Token Tok; + + while (1) { + // Read the next token to poison. While doing this, pretend that we are + // skipping while reading the identifier to poison. + // This avoids errors on code like: + // #pragma GCC poison X + // #pragma GCC poison X + if (CurLexer) CurLexer->LexingRawMode = true; + LexUnexpandedToken(Tok); + if (CurLexer) CurLexer->LexingRawMode = false; + + // If we reached the end of line, we're done. + if (Tok.is(tok::eom)) return; + + // Can only poison identifiers. + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_pp_invalid_poison); + return; + } + + // Look up the identifier info for the token. We disabled identifier lookup + // by saying we're skipping contents, so we need to do this manually. + IdentifierInfo *II = LookUpIdentifierInfo(Tok); + + // Already poisoned. + if (II->isPoisoned()) continue; + + // If this is a macro identifier, emit a warning. + if (II->hasMacroDefinition()) + Diag(Tok, diag::pp_poisoning_existing_macro); + + // Finally, poison it! + II->setIsPoisoned(); + } +} + +/// HandlePragmaSystemHeader - Implement #pragma GCC system_header. We know +/// that the whole directive has been parsed. +void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) { + if (isInPrimaryFile()) { + Diag(SysHeaderTok, diag::pp_pragma_sysheader_in_main_file); + return; + } + + // Get the current file lexer we're looking at. Ignore _Pragma 'files' etc. + Lexer *TheLexer = getCurrentFileLexer(); + + // Mark the file as a system header. + const FileEntry *File = SourceMgr.getFileEntryForLoc(TheLexer->getFileLoc()); + HeaderInfo.MarkFileSystemHeader(File); + + // Notify the client, if desired, that we are in a new source file. + if (Callbacks) + Callbacks->FileChanged(TheLexer->getSourceLocation(TheLexer->BufferPtr), + PPCallbacks::SystemHeaderPragma, + DirectoryLookup::SystemHeaderDir); +} + +/// HandlePragmaDependency - Handle #pragma GCC dependency "foo" blah. +/// +void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { + Token FilenameTok; + CurLexer->LexIncludeFilename(FilenameTok); + + // If the token kind is EOM, the error has already been diagnosed. + if (FilenameTok.is(tok::eom)) + return; + + // Reserve a buffer to get the spelling. + llvm::SmallVector<char, 128> FilenameBuffer; + FilenameBuffer.resize(FilenameTok.getLength()); + + const char *FilenameStart = &FilenameBuffer[0]; + unsigned Len = getSpelling(FilenameTok, FilenameStart); + const char *FilenameEnd = FilenameStart+Len; + bool isAngled = GetIncludeFilenameSpelling(FilenameTok.getLocation(), + FilenameStart, FilenameEnd); + // If GetIncludeFilenameSpelling set the start ptr to null, there was an + // error. + if (FilenameStart == 0) + return; + + // Search include directories for this file. + const DirectoryLookup *CurDir; + const FileEntry *File = LookupFile(FilenameStart, FilenameEnd, + isAngled, 0, CurDir); + if (File == 0) + return Diag(FilenameTok, diag::err_pp_file_not_found, + std::string(FilenameStart, FilenameEnd)); + + SourceLocation FileLoc = getCurrentFileLexer()->getFileLoc(); + const FileEntry *CurFile = SourceMgr.getFileEntryForLoc(FileLoc); + + // If this file is older than the file it depends on, emit a diagnostic. + if (CurFile && CurFile->getModificationTime() < File->getModificationTime()) { + // Lex tokens at the end of the message and include them in the message. + std::string Message; + Lex(DependencyTok); + while (DependencyTok.isNot(tok::eom)) { + Message += getSpelling(DependencyTok) + " "; + Lex(DependencyTok); + } + + Message.erase(Message.end()-1); + Diag(FilenameTok, diag::pp_out_of_date_dependency, Message); + } +} + + +/// AddPragmaHandler - Add the specified pragma handler to the preprocessor. +/// If 'Namespace' is non-null, then it is a token required to exist on the +/// pragma line before the pragma string starts, e.g. "STDC" or "GCC". +void Preprocessor::AddPragmaHandler(const char *Namespace, + PragmaHandler *Handler) { + PragmaNamespace *InsertNS = PragmaHandlers; + + // If this is specified to be in a namespace, step down into it. + if (Namespace) { + IdentifierInfo *NSID = getIdentifierInfo(Namespace); + + // If there is already a pragma handler with the name of this namespace, + // we either have an error (directive with the same name as a namespace) or + // we already have the namespace to insert into. + if (PragmaHandler *Existing = PragmaHandlers->FindHandler(NSID)) { + InsertNS = Existing->getIfNamespace(); + assert(InsertNS != 0 && "Cannot have a pragma namespace and pragma" + " handler with the same name!"); + } else { + // Otherwise, this namespace doesn't exist yet, create and insert the + // handler for it. + InsertNS = new PragmaNamespace(NSID); + PragmaHandlers->AddPragma(InsertNS); + } + } + + // Check to make sure we don't already have a pragma for this identifier. + assert(!InsertNS->FindHandler(Handler->getName()) && + "Pragma handler already exists for this identifier!"); + InsertNS->AddPragma(Handler); +} + +namespace { +/// PragmaOnceHandler - "#pragma once" marks the file as atomically included. +struct PragmaOnceHandler : public PragmaHandler { + PragmaOnceHandler(const IdentifierInfo *OnceID) : PragmaHandler(OnceID) {} + virtual void HandlePragma(Preprocessor &PP, Token &OnceTok) { + PP.CheckEndOfDirective("#pragma once"); + PP.HandlePragmaOnce(OnceTok); + } +}; + +/// PragmaMarkHandler - "#pragma mark ..." is ignored by the compiler, and the +/// rest of the line is not lexed. +struct PragmaMarkHandler : public PragmaHandler { + PragmaMarkHandler(const IdentifierInfo *MarkID) : PragmaHandler(MarkID) {} + virtual void HandlePragma(Preprocessor &PP, Token &MarkTok) { + PP.HandlePragmaMark(); + } +}; + +/// PragmaPoisonHandler - "#pragma poison x" marks x as not usable. +struct PragmaPoisonHandler : public PragmaHandler { + PragmaPoisonHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {} + virtual void HandlePragma(Preprocessor &PP, Token &PoisonTok) { + PP.HandlePragmaPoison(PoisonTok); + } +}; + +/// PragmaSystemHeaderHandler - "#pragma system_header" marks the current file +/// as a system header, which silences warnings in it. +struct PragmaSystemHeaderHandler : public PragmaHandler { + PragmaSystemHeaderHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {} + virtual void HandlePragma(Preprocessor &PP, Token &SHToken) { + PP.HandlePragmaSystemHeader(SHToken); + PP.CheckEndOfDirective("#pragma"); + } +}; +struct PragmaDependencyHandler : public PragmaHandler { + PragmaDependencyHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {} + virtual void HandlePragma(Preprocessor &PP, Token &DepToken) { + PP.HandlePragmaDependency(DepToken); + } +}; +} // end anonymous namespace + + +/// RegisterBuiltinPragmas - Install the standard preprocessor pragmas: +/// #pragma GCC poison/system_header/dependency and #pragma once. +void Preprocessor::RegisterBuiltinPragmas() { + AddPragmaHandler(0, new PragmaOnceHandler(getIdentifierInfo("once"))); + AddPragmaHandler(0, new PragmaMarkHandler(getIdentifierInfo("mark"))); + AddPragmaHandler("GCC", new PragmaPoisonHandler(getIdentifierInfo("poison"))); + AddPragmaHandler("GCC", new PragmaSystemHeaderHandler( + getIdentifierInfo("system_header"))); + AddPragmaHandler("GCC", new PragmaDependencyHandler( + getIdentifierInfo("dependency"))); +} diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp new file mode 100644 index 00000000000..86156a07728 --- /dev/null +++ b/clang/lib/Lex/Preprocessor.cpp @@ -0,0 +1,560 @@ +//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Preprocessor interface. +// +//===----------------------------------------------------------------------===// +// +// Options to support: +// -H - Print the name of each header file used. +// -d[MDNI] - Dump various things. +// -fworking-directory - #line's with preprocessor's working dir. +// -fpreprocessed +// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD +// -W* +// -w +// +// Messages to emit: +// "Multiple include guards may be useful for:\n" +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/Pragma.h" +#include "clang/Lex/ScratchBuffer.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Streams.h" +#include <ctime> +using namespace clang; + +//===----------------------------------------------------------------------===// + +Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts, + TargetInfo &target, SourceManager &SM, + HeaderSearch &Headers) + : Diags(diags), Features(opts), Target(target), FileMgr(Headers.getFileMgr()), + SourceMgr(SM), HeaderInfo(Headers), Identifiers(opts), + CurLexer(0), CurDirLookup(0), CurTokenLexer(0), Callbacks(0) { + ScratchBuf = new ScratchBuffer(SourceMgr); + + // Clear stats. + NumDirectives = NumDefined = NumUndefined = NumPragma = 0; + NumIf = NumElse = NumEndif = 0; + NumEnteredSourceFiles = 0; + NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; + NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; + MaxIncludeStackDepth = 0; + NumSkipped = 0; + + // Default to discarding comments. + KeepComments = false; + KeepMacroComments = false; + + // Macro expansion is enabled. + DisableMacroExpansion = false; + InMacroArgs = false; + NumCachedTokenLexers = 0; + + // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. + // This gets unpoisoned where it is allowed. + (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); + + Predefines = 0; + + // Initialize the pragma handlers. + PragmaHandlers = new PragmaNamespace(0); + RegisterBuiltinPragmas(); + + // Initialize builtin macros like __LINE__ and friends. + RegisterBuiltinMacros(); +} + +Preprocessor::~Preprocessor() { + // Free any active lexers. + delete CurLexer; + + while (!IncludeMacroStack.empty()) { + delete IncludeMacroStack.back().TheLexer; + delete IncludeMacroStack.back().TheTokenLexer; + IncludeMacroStack.pop_back(); + } + + // Free any macro definitions. + for (llvm::DenseMap<IdentifierInfo*, MacroInfo*>::iterator I = + Macros.begin(), E = Macros.end(); I != E; ++I) { + // Free the macro definition. + delete I->second; + I->second = 0; + I->first->setHasMacroDefinition(false); + } + + // Free any cached macro expanders. + for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i) + delete TokenLexerCache[i]; + + // Release pragma information. + delete PragmaHandlers; + + // Delete the scratch buffer info. + delete ScratchBuf; + + delete Callbacks; +} + +/// Diag - Forwarding function for diagnostics. This emits a diagnostic at +/// the specified Token's location, translating the token's start +/// position in the current buffer into a SourcePosition object for rendering. +void Preprocessor::Diag(SourceLocation Loc, unsigned DiagID) { + Diags.Report(getFullLoc(Loc), DiagID); +} + +void Preprocessor::Diag(SourceLocation Loc, unsigned DiagID, + const std::string &Msg) { + Diags.Report(getFullLoc(Loc), DiagID, &Msg, 1); +} + +void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { + llvm::cerr << tok::getTokenName(Tok.getKind()) << " '" + << getSpelling(Tok) << "'"; + + if (!DumpFlags) return; + + llvm::cerr << "\t"; + if (Tok.isAtStartOfLine()) + llvm::cerr << " [StartOfLine]"; + if (Tok.hasLeadingSpace()) + llvm::cerr << " [LeadingSpace]"; + if (Tok.isExpandDisabled()) + llvm::cerr << " [ExpandDisabled]"; + if (Tok.needsCleaning()) { + const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); + llvm::cerr << " [UnClean='" << std::string(Start, Start+Tok.getLength()) + << "']"; + } + + llvm::cerr << "\tLoc=<"; + DumpLocation(Tok.getLocation()); + llvm::cerr << ">"; +} + +void Preprocessor::DumpLocation(SourceLocation Loc) const { + SourceLocation LogLoc = SourceMgr.getLogicalLoc(Loc); + llvm::cerr << SourceMgr.getSourceName(LogLoc) << ':' + << SourceMgr.getLineNumber(LogLoc) << ':' + << SourceMgr.getLineNumber(LogLoc); + + SourceLocation PhysLoc = SourceMgr.getPhysicalLoc(Loc); + if (PhysLoc != LogLoc) { + llvm::cerr << " <PhysLoc="; + DumpLocation(PhysLoc); + llvm::cerr << ">"; + } +} + +void Preprocessor::DumpMacro(const MacroInfo &MI) const { + llvm::cerr << "MACRO: "; + for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { + DumpToken(MI.getReplacementToken(i)); + llvm::cerr << " "; + } + llvm::cerr << "\n"; +} + +void Preprocessor::PrintStats() { + llvm::cerr << "\n*** Preprocessor Stats:\n"; + llvm::cerr << NumDirectives << " directives found:\n"; + llvm::cerr << " " << NumDefined << " #define.\n"; + llvm::cerr << " " << NumUndefined << " #undef.\n"; + llvm::cerr << " #include/#include_next/#import:\n"; + llvm::cerr << " " << NumEnteredSourceFiles << " source files entered.\n"; + llvm::cerr << " " << MaxIncludeStackDepth << " max include stack depth\n"; + llvm::cerr << " " << NumIf << " #if/#ifndef/#ifdef.\n"; + llvm::cerr << " " << NumElse << " #else/#elif.\n"; + llvm::cerr << " " << NumEndif << " #endif.\n"; + llvm::cerr << " " << NumPragma << " #pragma.\n"; + llvm::cerr << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; + + llvm::cerr << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" + << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " + << NumFastMacroExpanded << " on the fast path.\n"; + llvm::cerr << (NumFastTokenPaste+NumTokenPaste) + << " token paste (##) operations performed, " + << NumFastTokenPaste << " on the fast path.\n"; +} + +//===----------------------------------------------------------------------===// +// Token Spelling +//===----------------------------------------------------------------------===// + + +/// getSpelling() - Return the 'spelling' of this token. The spelling of a +/// token are the characters used to represent the token in the source file +/// after trigraph expansion and escaped-newline folding. In particular, this +/// wants to get the true, uncanonicalized, spelling of things like digraphs +/// UCNs, etc. +std::string Preprocessor::getSpelling(const Token &Tok) const { + assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); + + // If this token contains nothing interesting, return it directly. + const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation()); + if (!Tok.needsCleaning()) + return std::string(TokStart, TokStart+Tok.getLength()); + + std::string Result; + Result.reserve(Tok.getLength()); + + // Otherwise, hard case, relex the characters into the string. + for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); + Ptr != End; ) { + unsigned CharSize; + Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features)); + Ptr += CharSize; + } + assert(Result.size() != unsigned(Tok.getLength()) && + "NeedsCleaning flag set on something that didn't need cleaning!"); + return Result; +} + +/// getSpelling - This method is used to get the spelling of a token into a +/// preallocated buffer, instead of as an std::string. The caller is required +/// to allocate enough space for the token, which is guaranteed to be at least +/// Tok.getLength() bytes long. The actual length of the token is returned. +/// +/// Note that this method may do two possible things: it may either fill in +/// the buffer specified with characters, or it may *change the input pointer* +/// to point to a constant buffer with the data already in it (avoiding a +/// copy). The caller is not allowed to modify the returned buffer pointer +/// if an internal buffer is returned. +unsigned Preprocessor::getSpelling(const Token &Tok, + const char *&Buffer) const { + assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); + + // If this token is an identifier, just return the string from the identifier + // table, which is very quick. + if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { + Buffer = II->getName(); + + // Return the length of the token. If the token needed cleaning, don't + // include the size of the newlines or trigraphs in it. + if (!Tok.needsCleaning()) + return Tok.getLength(); + else + return strlen(Buffer); + } + + // Otherwise, compute the start of the token in the input lexer buffer. + const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation()); + + // If this token contains nothing interesting, return it directly. + if (!Tok.needsCleaning()) { + Buffer = TokStart; + return Tok.getLength(); + } + // Otherwise, hard case, relex the characters into the string. + char *OutBuf = const_cast<char*>(Buffer); + for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); + Ptr != End; ) { + unsigned CharSize; + *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features); + Ptr += CharSize; + } + assert(unsigned(OutBuf-Buffer) != Tok.getLength() && + "NeedsCleaning flag set on something that didn't need cleaning!"); + + return OutBuf-Buffer; +} + + +/// CreateString - Plop the specified string into a scratch buffer and return a +/// location for it. If specified, the source location provides a source +/// location for the token. +SourceLocation Preprocessor:: +CreateString(const char *Buf, unsigned Len, SourceLocation SLoc) { + if (SLoc.isValid()) + return ScratchBuf->getToken(Buf, Len, SLoc); + return ScratchBuf->getToken(Buf, Len); +} + + +/// AdvanceToTokenCharacter - Given a location that specifies the start of a +/// token, return a new location that specifies a character within the token. +SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart, + unsigned CharNo) { + // If they request the first char of the token, we're trivially done. If this + // is a macro expansion, it doesn't make sense to point to a character within + // the instantiation point (the name). We could point to the source + // character, but without also pointing to instantiation info, this is + // confusing. + if (CharNo == 0 || TokStart.isMacroID()) return TokStart; + + // Figure out how many physical characters away the specified logical + // character is. This needs to take into consideration newlines and + // trigraphs. + const char *TokPtr = SourceMgr.getCharacterData(TokStart); + unsigned PhysOffset = 0; + + // The usual case is that tokens don't contain anything interesting. Skip + // over the uninteresting characters. If a token only consists of simple + // chars, this method is extremely fast. + while (CharNo && Lexer::isObviouslySimpleCharacter(*TokPtr)) + ++TokPtr, --CharNo, ++PhysOffset; + + // If we have a character that may be a trigraph or escaped newline, create a + // lexer to parse it correctly. + if (CharNo != 0) { + // Create a lexer starting at this token position. + Lexer TheLexer(TokStart, *this, TokPtr); + Token Tok; + // Skip over characters the remaining characters. + const char *TokStartPtr = TokPtr; + for (; CharNo; --CharNo) + TheLexer.getAndAdvanceChar(TokPtr, Tok); + + PhysOffset += TokPtr-TokStartPtr; + } + + return TokStart.getFileLocWithOffset(PhysOffset); +} + + +//===----------------------------------------------------------------------===// +// Preprocessor Initialization Methods +//===----------------------------------------------------------------------===// + +// Append a #define line to Buf for Macro. Macro should be of the form XXX, +// in which case we emit "#define XXX 1" or "XXX=Y z W" in which case we emit +// "#define XXX Y z W". To get a #define with no value, use "XXX=". +static void DefineBuiltinMacro(std::vector<char> &Buf, const char *Macro, + const char *Command = "#define ") { + Buf.insert(Buf.end(), Command, Command+strlen(Command)); + if (const char *Equal = strchr(Macro, '=')) { + // Turn the = into ' '. + Buf.insert(Buf.end(), Macro, Equal); + Buf.push_back(' '); + Buf.insert(Buf.end(), Equal+1, Equal+strlen(Equal)); + } else { + // Push "macroname 1". + Buf.insert(Buf.end(), Macro, Macro+strlen(Macro)); + Buf.push_back(' '); + Buf.push_back('1'); + } + Buf.push_back('\n'); +} + + +static void InitializePredefinedMacros(Preprocessor &PP, + std::vector<char> &Buf) { + // FIXME: Implement magic like cpp_init_builtins for things like __STDC__ + // and __DATE__ etc. +#if 0 + /* __STDC__ has the value 1 under normal circumstances. + However, if (a) we are in a system header, (b) the option + stdc_0_in_system_headers is true (set by target config), and + (c) we are not in strictly conforming mode, then it has the + value 0. (b) and (c) are already checked in cpp_init_builtins. */ + //case BT_STDC: + if (cpp_in_system_header (pfile)) + number = 0; + else + number = 1; + break; +#endif + // These should all be defined in the preprocessor according to the + // current language configuration. + DefineBuiltinMacro(Buf, "__STDC__=1"); + //DefineBuiltinMacro(Buf, "__ASSEMBLER__=1"); + if (PP.getLangOptions().C99 && !PP.getLangOptions().CPlusPlus) + DefineBuiltinMacro(Buf, "__STDC_VERSION__=199901L"); + else if (0) // STDC94 ? + DefineBuiltinMacro(Buf, "__STDC_VERSION__=199409L"); + + DefineBuiltinMacro(Buf, "__STDC_HOSTED__=1"); + if (PP.getLangOptions().ObjC1) + DefineBuiltinMacro(Buf, "__OBJC__=1"); + if (PP.getLangOptions().ObjC2) + DefineBuiltinMacro(Buf, "__OBJC2__=1"); + + // Add __builtin_va_list typedef. + { + const char *VAList = PP.getTargetInfo().getVAListDeclaration(); + Buf.insert(Buf.end(), VAList, VAList+strlen(VAList)); + Buf.push_back('\n'); + } + + // Get the target #defines. + PP.getTargetInfo().getTargetDefines(Buf); + + // Compiler set macros. + DefineBuiltinMacro(Buf, "__APPLE_CC__=5250"); + DefineBuiltinMacro(Buf, "__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__=1050"); + DefineBuiltinMacro(Buf, "__GNUC_MINOR__=0"); + DefineBuiltinMacro(Buf, "__GNUC_PATCHLEVEL__=1"); + DefineBuiltinMacro(Buf, "__GNUC__=4"); + DefineBuiltinMacro(Buf, "__GXX_ABI_VERSION=1002"); + DefineBuiltinMacro(Buf, "__VERSION__=\"4.0.1 (Apple Computer, Inc. " + "build 5250)\""); + + // Build configuration options. + DefineBuiltinMacro(Buf, "__DYNAMIC__=1"); + DefineBuiltinMacro(Buf, "__FINITE_MATH_ONLY__=0"); + DefineBuiltinMacro(Buf, "__NO_INLINE__=1"); + DefineBuiltinMacro(Buf, "__PIC__=1"); + + + if (PP.getLangOptions().CPlusPlus) { + DefineBuiltinMacro(Buf, "__DEPRECATED=1"); + DefineBuiltinMacro(Buf, "__EXCEPTIONS=1"); + DefineBuiltinMacro(Buf, "__GNUG__=4"); + DefineBuiltinMacro(Buf, "__GXX_WEAK__=1"); + DefineBuiltinMacro(Buf, "__cplusplus=1"); + DefineBuiltinMacro(Buf, "__private_extern__=extern"); + } + if (PP.getLangOptions().Microsoft) { + DefineBuiltinMacro(Buf, "__stdcall="); + DefineBuiltinMacro(Buf, "__cdecl="); + DefineBuiltinMacro(Buf, "_cdecl="); + DefineBuiltinMacro(Buf, "__ptr64="); + DefineBuiltinMacro(Buf, "__w64="); + DefineBuiltinMacro(Buf, "__forceinline="); + DefineBuiltinMacro(Buf, "__int8=char"); + DefineBuiltinMacro(Buf, "__int16=short"); + DefineBuiltinMacro(Buf, "__int32=int"); + DefineBuiltinMacro(Buf, "__int64=long long"); + DefineBuiltinMacro(Buf, "__declspec(X)="); + } + // FIXME: Should emit a #line directive here. +} + + +/// EnterMainSourceFile - Enter the specified FileID as the main source file, +/// which implicitly adds the builtin defines etc. +void Preprocessor::EnterMainSourceFile() { + + unsigned MainFileID = SourceMgr.getMainFileID(); + + // Enter the main file source buffer. + EnterSourceFile(MainFileID, 0); + + // Tell the header info that the main file was entered. If the file is later + // #imported, it won't be re-entered. + if (const FileEntry *FE = + SourceMgr.getFileEntryForLoc(SourceLocation::getFileLoc(MainFileID, 0))) + HeaderInfo.IncrementIncludeCount(FE); + + std::vector<char> PrologFile; + PrologFile.reserve(4080); + + // Install things like __POWERPC__, __GNUC__, etc into the macro table. + InitializePredefinedMacros(*this, PrologFile); + + // Add on the predefines from the driver. + PrologFile.insert(PrologFile.end(), Predefines,Predefines+strlen(Predefines)); + + // Memory buffer must end with a null byte! + PrologFile.push_back(0); + + // Now that we have emitted the predefined macros, #includes, etc into + // PrologFile, preprocess it to populate the initial preprocessor state. + llvm::MemoryBuffer *SB = + llvm::MemoryBuffer::getMemBufferCopy(&PrologFile.front(),&PrologFile.back(), + "<predefines>"); + assert(SB && "Cannot fail to create predefined source buffer"); + unsigned FileID = SourceMgr.createFileIDForMemBuffer(SB); + assert(FileID && "Could not create FileID for predefines?"); + + // Start parsing the predefines. + EnterSourceFile(FileID, 0); +} + + +//===----------------------------------------------------------------------===// +// Lexer Event Handling. +//===----------------------------------------------------------------------===// + +/// LookUpIdentifierInfo - Given a tok::identifier token, look up the +/// identifier information for the token and install it into the token. +IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier, + const char *BufPtr) { + assert(Identifier.is(tok::identifier) && "Not an identifier!"); + assert(Identifier.getIdentifierInfo() == 0 && "Identinfo already exists!"); + + // Look up this token, see if it is a macro, or if it is a language keyword. + IdentifierInfo *II; + if (BufPtr && !Identifier.needsCleaning()) { + // No cleaning needed, just use the characters from the lexed buffer. + II = getIdentifierInfo(BufPtr, BufPtr+Identifier.getLength()); + } else { + // Cleaning needed, alloca a buffer, clean into it, then use the buffer. + llvm::SmallVector<char, 64> IdentifierBuffer; + IdentifierBuffer.resize(Identifier.getLength()); + const char *TmpBuf = &IdentifierBuffer[0]; + unsigned Size = getSpelling(Identifier, TmpBuf); + II = getIdentifierInfo(TmpBuf, TmpBuf+Size); + } + Identifier.setIdentifierInfo(II); + return II; +} + + +/// HandleIdentifier - This callback is invoked when the lexer reads an +/// identifier. This callback looks up the identifier in the map and/or +/// potentially macro expands it or turns it into a named token (like 'for'). +void Preprocessor::HandleIdentifier(Token &Identifier) { + assert(Identifier.getIdentifierInfo() && + "Can't handle identifiers without identifier info!"); + + IdentifierInfo &II = *Identifier.getIdentifierInfo(); + + // If this identifier was poisoned, and if it was not produced from a macro + // expansion, emit an error. + if (II.isPoisoned() && CurLexer) { + if (&II != Ident__VA_ARGS__) // We warn about __VA_ARGS__ with poisoning. + Diag(Identifier, diag::err_pp_used_poisoned_id); + else + Diag(Identifier, diag::ext_pp_bad_vaargs_use); + } + + // If this is a macro to be expanded, do it. + if (MacroInfo *MI = getMacroInfo(&II)) { + if (!DisableMacroExpansion && !Identifier.isExpandDisabled()) { + if (MI->isEnabled()) { + if (!HandleMacroExpandedIdentifier(Identifier, MI)) + return; + } else { + // C99 6.10.3.4p2 says that a disabled macro may never again be + // expanded, even if it's in a context where it could be expanded in the + // future. + Identifier.setFlag(Token::DisableExpand); + } + } + } + + // C++ 2.11p2: If this is an alternative representation of a C++ operator, + // then we act as if it is the actual operator and not the textual + // representation of it. + if (II.isCPlusPlusOperatorKeyword()) + Identifier.setIdentifierInfo(0); + + // Change the kind of this identifier to the appropriate token kind, e.g. + // turning "for" into a keyword. + Identifier.setKind(II.getTokenID()); + + // If this is an extension token, diagnose its use. + // FIXME: tried (unsuccesfully) to shut this up when compiling with gnu99 + // For now, I'm just commenting it out (while I work on attributes). + if (II.isExtensionToken() && Features.C99) + Diag(Identifier, diag::ext_token_used); +} + diff --git a/clang/lib/Lex/ScratchBuffer.cpp b/clang/lib/Lex/ScratchBuffer.cpp new file mode 100644 index 00000000000..99fbdf75654 --- /dev/null +++ b/clang/lib/Lex/ScratchBuffer.cpp @@ -0,0 +1,72 @@ +//===--- ScratchBuffer.cpp - Scratch space for forming tokens -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ScratchBuffer interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/ScratchBuffer.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cstring> +using namespace clang; + +// ScratchBufSize - The size of each chunk of scratch memory. Slightly less +//than a page, almost certainly enough for anything. :) +static const unsigned ScratchBufSize = 4060; + +ScratchBuffer::ScratchBuffer(SourceManager &SM) : SourceMgr(SM), CurBuffer(0) { + // Set BytesUsed so that the first call to getToken will require an alloc. + BytesUsed = ScratchBufSize; + FileID = 0; +} + +/// getToken - Splat the specified text into a temporary MemoryBuffer and +/// return a SourceLocation that refers to the token. This is just like the +/// method below, but returns a location that indicates the physloc of the +/// token. +SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len) { + if (BytesUsed+Len > ScratchBufSize) + AllocScratchBuffer(Len); + + // Copy the token data into the buffer. + memcpy(CurBuffer+BytesUsed, Buf, Len); + + // Remember that we used these bytes. + BytesUsed += Len; + + assert(BytesUsed-Len < (1 << SourceLocation::FilePosBits) && + "Out of range file position!"); + + return SourceLocation::getFileLoc(FileID, BytesUsed-Len); +} + + +/// getToken - Splat the specified text into a temporary MemoryBuffer and +/// return a SourceLocation that refers to the token. The SourceLoc value +/// gives a virtual location that the token will appear to be from. +SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len, + SourceLocation SourceLoc) { + // Map the physloc to the specified sourceloc. + return SourceMgr.getInstantiationLoc(getToken(Buf, Len), SourceLoc); +} + +void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) { + // Only pay attention to the requested length if it is larger than our default + // page size. If it is, we allocate an entire chunk for it. This is to + // support gigantic tokens, which almost certainly won't happen. :) + if (RequestLen < ScratchBufSize) + RequestLen = ScratchBufSize; + + llvm::MemoryBuffer *Buf = + llvm::MemoryBuffer::getNewMemBuffer(RequestLen, "<scratch space>"); + FileID = SourceMgr.createFileIDForMemBuffer(Buf); + CurBuffer = const_cast<char*>(Buf->getBufferStart()); + BytesUsed = 0; +} diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp new file mode 100644 index 00000000000..fc8cfd715c4 --- /dev/null +++ b/clang/lib/Lex/TokenLexer.cpp @@ -0,0 +1,488 @@ +//===--- TokenLexer.cpp - Lex from a token stream -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TokenLexer interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/TokenLexer.h" +#include "MacroArgs.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/SmallVector.h" +using namespace clang; + + +/// Create a TokenLexer for the specified macro with the specified actual +/// arguments. Note that this ctor takes ownership of the ActualArgs pointer. +void TokenLexer::Init(Token &Tok, MacroArgs *Actuals) { + // If the client is reusing a TokenLexer, make sure to free any memory + // associated with it. + destroy(); + + Macro = PP.getMacroInfo(Tok.getIdentifierInfo()); + ActualArgs = Actuals; + CurToken = 0; + InstantiateLoc = Tok.getLocation(); + AtStartOfLine = Tok.isAtStartOfLine(); + HasLeadingSpace = Tok.hasLeadingSpace(); + Tokens = &*Macro->tokens_begin(); + OwnsTokens = false; + DisableMacroExpansion = false; + NumTokens = Macro->tokens_end()-Macro->tokens_begin(); + + // If this is a function-like macro, expand the arguments and change + // Tokens to point to the expanded tokens. + if (Macro->isFunctionLike() && Macro->getNumArgs()) + ExpandFunctionArguments(); + + // Mark the macro as currently disabled, so that it is not recursively + // expanded. The macro must be disabled only after argument pre-expansion of + // function-like macro arguments occurs. + Macro->DisableMacro(); +} + + + +/// Create a TokenLexer for the specified token stream. This does not +/// take ownership of the specified token vector. +void TokenLexer::Init(const Token *TokArray, unsigned NumToks, + bool disableMacroExpansion, bool ownsTokens) { + // If the client is reusing a TokenLexer, make sure to free any memory + // associated with it. + destroy(); + + Macro = 0; + ActualArgs = 0; + Tokens = TokArray; + OwnsTokens = ownsTokens; + DisableMacroExpansion = disableMacroExpansion; + NumTokens = NumToks; + CurToken = 0; + InstantiateLoc = SourceLocation(); + AtStartOfLine = false; + HasLeadingSpace = false; + + // Set HasLeadingSpace/AtStartOfLine so that the first token will be + // returned unmodified. + if (NumToks != 0) { + AtStartOfLine = TokArray[0].isAtStartOfLine(); + HasLeadingSpace = TokArray[0].hasLeadingSpace(); + } +} + + +void TokenLexer::destroy() { + // If this was a function-like macro that actually uses its arguments, delete + // the expanded tokens. + if (OwnsTokens) { + delete [] Tokens; + Tokens = 0; + } + + // TokenLexer owns its formal arguments. + if (ActualArgs) ActualArgs->destroy(); +} + +/// Expand the arguments of a function-like macro so that we can quickly +/// return preexpanded tokens from Tokens. +void TokenLexer::ExpandFunctionArguments() { + llvm::SmallVector<Token, 128> ResultToks; + + // Loop through 'Tokens', expanding them into ResultToks. Keep + // track of whether we change anything. If not, no need to keep them. If so, + // we install the newly expanded sequence as the new 'Tokens' list. + bool MadeChange = false; + + // NextTokGetsSpace - When this is true, the next token appended to the + // output list will get a leading space, regardless of whether it had one to + // begin with or not. This is used for placemarker support. + bool NextTokGetsSpace = false; + + for (unsigned i = 0, e = NumTokens; i != e; ++i) { + // If we found the stringify operator, get the argument stringified. The + // preprocessor already verified that the following token is a macro name + // when the #define was parsed. + const Token &CurTok = Tokens[i]; + if (CurTok.is(tok::hash) || CurTok.is(tok::hashat)) { + int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo()); + assert(ArgNo != -1 && "Token following # is not an argument?"); + + Token Res; + if (CurTok.is(tok::hash)) // Stringify + Res = ActualArgs->getStringifiedArgument(ArgNo, PP); + else { + // 'charify': don't bother caching these. + Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo), + PP, true); + } + + // The stringified/charified string leading space flag gets set to match + // the #/#@ operator. + if (CurTok.hasLeadingSpace() || NextTokGetsSpace) + Res.setFlag(Token::LeadingSpace); + + ResultToks.push_back(Res); + MadeChange = true; + ++i; // Skip arg name. + NextTokGetsSpace = false; + continue; + } + + // Otherwise, if this is not an argument token, just add the token to the + // output buffer. + IdentifierInfo *II = CurTok.getIdentifierInfo(); + int ArgNo = II ? Macro->getArgumentNum(II) : -1; + if (ArgNo == -1) { + // This isn't an argument, just add it. + ResultToks.push_back(CurTok); + + if (NextTokGetsSpace) { + ResultToks.back().setFlag(Token::LeadingSpace); + NextTokGetsSpace = false; + } + continue; + } + + // An argument is expanded somehow, the result is different than the + // input. + MadeChange = true; + + // Otherwise, this is a use of the argument. Find out if there is a paste + // (##) operator before or after the argument. + bool PasteBefore = + !ResultToks.empty() && ResultToks.back().is(tok::hashhash); + bool PasteAfter = i+1 != e && Tokens[i+1].is(tok::hashhash); + + // If it is not the LHS/RHS of a ## operator, we must pre-expand the + // argument and substitute the expanded tokens into the result. This is + // C99 6.10.3.1p1. + if (!PasteBefore && !PasteAfter) { + const Token *ResultArgToks; + + // Only preexpand the argument if it could possibly need it. This + // avoids some work in common cases. + const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo); + if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP)) + ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0]; + else + ResultArgToks = ArgTok; // Use non-preexpanded tokens. + + // If the arg token expanded into anything, append it. + if (ResultArgToks->isNot(tok::eof)) { + unsigned FirstResult = ResultToks.size(); + unsigned NumToks = MacroArgs::getArgLength(ResultArgToks); + ResultToks.append(ResultArgToks, ResultArgToks+NumToks); + + // If any tokens were substituted from the argument, the whitespace + // before the first token should match the whitespace of the arg + // identifier. + ResultToks[FirstResult].setFlagValue(Token::LeadingSpace, + CurTok.hasLeadingSpace() || + NextTokGetsSpace); + NextTokGetsSpace = false; + } else { + // If this is an empty argument, and if there was whitespace before the + // formal token, make sure the next token gets whitespace before it. + NextTokGetsSpace = CurTok.hasLeadingSpace(); + } + continue; + } + + // Okay, we have a token that is either the LHS or RHS of a paste (##) + // argument. It gets substituted as its non-pre-expanded tokens. + const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo); + unsigned NumToks = MacroArgs::getArgLength(ArgToks); + if (NumToks) { // Not an empty argument? + // If this is the GNU ", ## __VA_ARG__" extension, and we just learned + // that __VA_ARG__ expands to multiple tokens, avoid a pasting error when + // the expander trys to paste ',' with the first token of the __VA_ARG__ + // expansion. + if (PasteBefore && ResultToks.size() >= 2 && + ResultToks[ResultToks.size()-2].is(tok::comma) && + (unsigned)ArgNo == Macro->getNumArgs()-1 && + Macro->isVariadic()) { + // Remove the paste operator, report use of the extension. + PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma); + ResultToks.pop_back(); + } + + ResultToks.append(ArgToks, ArgToks+NumToks); + + // If the next token was supposed to get leading whitespace, ensure it has + // it now. + if (NextTokGetsSpace) { + ResultToks[ResultToks.size()-NumToks].setFlag(Token::LeadingSpace); + NextTokGetsSpace = false; + } + continue; + } + + // If an empty argument is on the LHS or RHS of a paste, the standard (C99 + // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We + // implement this by eating ## operators when a LHS or RHS expands to + // empty. + NextTokGetsSpace |= CurTok.hasLeadingSpace(); + if (PasteAfter) { + // Discard the argument token and skip (don't copy to the expansion + // buffer) the paste operator after it. + NextTokGetsSpace |= Tokens[i+1].hasLeadingSpace(); + ++i; + continue; + } + + // If this is on the RHS of a paste operator, we've already copied the + // paste operator to the ResultToks list. Remove it. + assert(PasteBefore && ResultToks.back().is(tok::hashhash)); + NextTokGetsSpace |= ResultToks.back().hasLeadingSpace(); + ResultToks.pop_back(); + + // If this is the __VA_ARGS__ token, and if the argument wasn't provided, + // and if the macro had at least one real argument, and if the token before + // the ## was a comma, remove the comma. + if ((unsigned)ArgNo == Macro->getNumArgs()-1 && // is __VA_ARGS__ + ActualArgs->isVarargsElidedUse() && // Argument elided. + !ResultToks.empty() && ResultToks.back().is(tok::comma)) { + // Never add a space, even if the comma, ##, or arg had a space. + NextTokGetsSpace = false; + // Remove the paste operator, report use of the extension. + PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma); + ResultToks.pop_back(); + } + continue; + } + + // If anything changed, install this as the new Tokens list. + if (MadeChange) { + // This is deleted in the dtor. + NumTokens = ResultToks.size(); + Token *Res = new Token[ResultToks.size()]; + if (NumTokens) + memcpy(Res, &ResultToks[0], NumTokens*sizeof(Token)); + Tokens = Res; + OwnsTokens = true; + } +} + +/// Lex - Lex and return a token from this macro stream. +/// +void TokenLexer::Lex(Token &Tok) { + // Lexing off the end of the macro, pop this macro off the expansion stack. + if (isAtEnd()) { + // If this is a macro (not a token stream), mark the macro enabled now + // that it is no longer being expanded. + if (Macro) Macro->EnableMacro(); + + // Pop this context off the preprocessors lexer stack and get the next + // token. This will delete "this" so remember the PP instance var. + Preprocessor &PPCache = PP; + if (PP.HandleEndOfTokenLexer(Tok)) + return; + + // HandleEndOfTokenLexer may not return a token. If it doesn't, lex + // whatever is next. + return PPCache.Lex(Tok); + } + + // If this is the first token of the expanded result, we inherit spacing + // properties later. + bool isFirstToken = CurToken == 0; + + // Get the next token to return. + Tok = Tokens[CurToken++]; + + // If this token is followed by a token paste (##) operator, paste the tokens! + if (!isAtEnd() && Tokens[CurToken].is(tok::hashhash)) + if (PasteTokens(Tok)) { + // When handling the microsoft /##/ extension, the final token is + // returned by PasteTokens, not the pasted token. + return; + } + + // The token's current location indicate where the token was lexed from. We + // need this information to compute the spelling of the token, but any + // diagnostics for the expanded token should appear as if they came from + // InstantiationLoc. Pull this information together into a new SourceLocation + // that captures all of this. + if (InstantiateLoc.isValid()) { // Don't do this for token streams. + SourceManager &SrcMgr = PP.getSourceManager(); + Tok.setLocation(SrcMgr.getInstantiationLoc(Tok.getLocation(), + InstantiateLoc)); + } + + // If this is the first token, set the lexical properties of the token to + // match the lexical properties of the macro identifier. + if (isFirstToken) { + Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); + Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace); + } + + // Handle recursive expansion! + if (Tok.getIdentifierInfo() && !DisableMacroExpansion) + return PP.HandleIdentifier(Tok); + + // Otherwise, return a normal token. +} + +/// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ## +/// operator. Read the ## and RHS, and paste the LHS/RHS together. If there +/// are is another ## after it, chomp it iteratively. Return the result as Tok. +/// If this returns true, the caller should immediately return the token. +bool TokenLexer::PasteTokens(Token &Tok) { + llvm::SmallVector<char, 128> Buffer; + do { + // Consume the ## operator. + SourceLocation PasteOpLoc = Tokens[CurToken].getLocation(); + ++CurToken; + assert(!isAtEnd() && "No token on the RHS of a paste operator!"); + + // Get the RHS token. + const Token &RHS = Tokens[CurToken]; + + bool isInvalid = false; + + // Allocate space for the result token. This is guaranteed to be enough for + // the two tokens and a null terminator. + Buffer.resize(Tok.getLength() + RHS.getLength() + 1); + + // Get the spelling of the LHS token in Buffer. + const char *BufPtr = &Buffer[0]; + unsigned LHSLen = PP.getSpelling(Tok, BufPtr); + if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer! + memcpy(&Buffer[0], BufPtr, LHSLen); + + BufPtr = &Buffer[LHSLen]; + unsigned RHSLen = PP.getSpelling(RHS, BufPtr); + if (BufPtr != &Buffer[LHSLen]) // Really, we want the chars in Buffer! + memcpy(&Buffer[LHSLen], BufPtr, RHSLen); + + // Add null terminator. + Buffer[LHSLen+RHSLen] = '\0'; + + // Trim excess space. + Buffer.resize(LHSLen+RHSLen+1); + + // Plop the pasted result (including the trailing newline and null) into a + // scratch buffer where we can lex it. + SourceLocation ResultTokLoc = PP.CreateString(&Buffer[0], Buffer.size()); + + // Lex the resultant pasted token into Result. + Token Result; + + // Avoid testing /*, as the lexer would think it is the start of a comment + // and emit an error that it is unterminated. + if (Tok.is(tok::slash) && RHS.is(tok::star)) { + isInvalid = true; + } else if (Tok.is(tok::identifier) && RHS.is(tok::identifier)) { + // Common paste case: identifier+identifier = identifier. Avoid creating + // a lexer and other overhead. + PP.IncrementPasteCounter(true); + Result.startToken(); + Result.setKind(tok::identifier); + Result.setLocation(ResultTokLoc); + Result.setLength(LHSLen+RHSLen); + } else { + PP.IncrementPasteCounter(false); + + // Make a lexer to lex this string from. + SourceManager &SourceMgr = PP.getSourceManager(); + const char *ResultStrData = SourceMgr.getCharacterData(ResultTokLoc); + + // Make a lexer object so that we lex and expand the paste result. + Lexer *TL = new Lexer(ResultTokLoc, PP, ResultStrData, + ResultStrData+LHSLen+RHSLen /*don't include null*/); + + // Lex a token in raw mode. This way it won't look up identifiers + // automatically, lexing off the end will return an eof token, and + // warnings are disabled. This returns true if the result token is the + // entire buffer. + bool IsComplete = TL->LexRawToken(Result); + + // If we got an EOF token, we didn't form even ONE token. For example, we + // did "/ ## /" to get "//". + IsComplete &= Result.isNot(tok::eof); + isInvalid = !IsComplete; + + // We're now done with the temporary lexer. + delete TL; + } + + // If pasting the two tokens didn't form a full new token, this is an error. + // This occurs with "x ## +" and other stuff. Return with Tok unmodified + // and with RHS as the next token to lex. + if (isInvalid) { + // Test for the Microsoft extension of /##/ turning into // here on the + // error path. + if (PP.getLangOptions().Microsoft && Tok.is(tok::slash) && + RHS.is(tok::slash)) { + HandleMicrosoftCommentPaste(Tok); + return true; + } else { + // TODO: If not in assembler language mode. + PP.Diag(PasteOpLoc, diag::err_pp_bad_paste, + std::string(Buffer.begin(), Buffer.end()-1)); + return false; + } + } + + // Turn ## into 'unknown' to avoid # ## # from looking like a paste + // operator. + if (Result.is(tok::hashhash)) + Result.setKind(tok::unknown); + // FIXME: Turn __VA_ARGS__ into "not a token"? + + // Transfer properties of the LHS over the the Result. + Result.setFlagValue(Token::StartOfLine , Tok.isAtStartOfLine()); + Result.setFlagValue(Token::LeadingSpace, Tok.hasLeadingSpace()); + + // Finally, replace LHS with the result, consume the RHS, and iterate. + ++CurToken; + Tok = Result; + } while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash)); + + // Now that we got the result token, it will be subject to expansion. Since + // token pasting re-lexes the result token in raw mode, identifier information + // isn't looked up. As such, if the result is an identifier, look up id info. + if (Tok.is(tok::identifier)) { + // Look up the identifier info for the token. We disabled identifier lookup + // by saying we're skipping contents, so we need to do this manually. + Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok)); + } + return false; +} + +/// isNextTokenLParen - If the next token lexed will pop this macro off the +/// expansion stack, return 2. If the next unexpanded token is a '(', return +/// 1, otherwise return 0. +unsigned TokenLexer::isNextTokenLParen() const { + // Out of tokens? + if (isAtEnd()) + return 2; + return Tokens[CurToken].is(tok::l_paren); +} + + +/// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes +/// together to form a comment that comments out everything in the current +/// macro, other active macros, and anything left on the current physical +/// source line of the instantiated buffer. Handle this by returning the +/// first token on the next line. +void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok) { + // We 'comment out' the rest of this macro by just ignoring the rest of the + // tokens that have not been lexed yet, if any. + + // Since this must be a macro, mark the macro enabled now that it is no longer + // being expanded. + assert(Macro && "Token streams can't paste comments"); + Macro->EnableMacro(); + + PP.HandleMicrosoftCommentPaste(Tok); +} diff --git a/clang/lib/Makefile b/clang/lib/Makefile new file mode 100755 index 00000000000..f6514d57c77 --- /dev/null +++ b/clang/lib/Makefile @@ -0,0 +1,14 @@ +##===- lib/Makefile ----------------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. + +PARALLEL_DIRS = Headers Basic Lex Parse AST Sema CodeGen Analysis Rewrite + +include $(LEVEL)/Makefile.common + diff --git a/clang/lib/Parse/AttributeList.cpp b/clang/lib/Parse/AttributeList.cpp new file mode 100644 index 00000000000..0ff9447d2e6 --- /dev/null +++ b/clang/lib/Parse/AttributeList.cpp @@ -0,0 +1,98 @@ +//===--- AttributeList.cpp --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the AttributeList class implementation +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/AttributeList.h" +using namespace clang; + +AttributeList::AttributeList(IdentifierInfo *aName, SourceLocation aLoc, + IdentifierInfo *pName, SourceLocation pLoc, + Action::ExprTy **elist, unsigned numargs, + AttributeList *n) + : AttrName(aName), AttrLoc(aLoc), ParmName(pName), ParmLoc(pLoc), + NumArgs(numargs), Next(n) { + Args = new Action::ExprTy*[numargs]; + for (unsigned i = 0; i != numargs; ++i) + Args[i] = elist[i]; +} + +AttributeList::~AttributeList() { + if (Args) { + // FIXME: before we delete the vector, we need to make sure the Expr's + // have been deleted. Since Action::ExprTy is "void", we are dependent + // on the actions module for actually freeing the memory. The specific + // hooks are ActOnDeclarator, ActOnTypeName, ActOnParamDeclaratorType, + // ParseField, ParseTag. Once these routines have freed the expression, + // they should zero out the Args slot (to indicate the memory has been + // freed). If any element of the vector is non-null, we should assert. + delete [] Args; + } + delete Next; +} + +AttributeList::Kind AttributeList::getKind(const IdentifierInfo *Name) { + const char *Str = Name->getName(); + unsigned Len = Name->getLength(); + + // Normalize the attribute name, __foo__ becomes foo. + if (Len > 4 && Str[0] == '_' && Str[1] == '_' && + Str[Len - 2] == '_' && Str[Len - 1] == '_') { + Str += 2; + Len -= 4; + } + + switch (Len) { + case 4: + if (!memcmp(Str, "weak", 4)) return AT_weak; + if (!memcmp(Str, "pure", 4)) return AT_pure; + break; + case 6: + if (!memcmp(Str, "packed", 6)) return AT_packed; + if (!memcmp(Str, "malloc", 6)) return AT_malloc; + if (!memcmp(Str, "format", 6)) return AT_format; + if (!memcmp(Str, "unused", 6)) return AT_unused; + break; + case 7: + if (!memcmp(Str, "aligned", 7)) return AT_aligned; + if (!memcmp(Str, "nothrow", 7)) return AT_nothrow; + if (!memcmp(Str, "nonnull", 7)) return AT_nonnull; + if (!memcmp(Str, "stdcall", 7)) return AT_stdcall; + break; + case 8: + if (!memcmp(Str, "annotate", 8)) return AT_annotate; + if (!memcmp(Str, "noreturn", 8)) return AT_noreturn; + if (!memcmp(Str, "noinline", 8)) return AT_noinline; + if (!memcmp(Str, "fastcall", 8)) return AT_fastcall; + break; + case 9: + if (!memcmp(Str, "dllimport", 9)) return AT_dllimport; + if (!memcmp(Str, "dllexport", 9)) return AT_dllexport; + break; + case 10: + if (!memcmp(Str, "deprecated", 10)) return AT_deprecated; + if (!memcmp(Str, "visibility", 10)) return AT_visibility; + break; + case 11: + if (!memcmp(Str, "vector_size", 11)) return AT_vector_size; + break; + case 13: + if (!memcmp(Str, "address_space", 13)) return AT_address_space; + break; + case 15: + if (!memcmp(Str, "ocu_vector_type", 15)) return AT_ocu_vector_type; + break; + case 18: + if (!memcmp(Str, "warn_unused_result", 18)) return AT_warn_unused_result; + break; + } + return UnknownAttribute; +} diff --git a/clang/lib/Parse/DeclSpec.cpp b/clang/lib/Parse/DeclSpec.cpp new file mode 100644 index 00000000000..1cd350893f4 --- /dev/null +++ b/clang/lib/Parse/DeclSpec.cpp @@ -0,0 +1,287 @@ +//===--- SemaDeclSpec.cpp - Declaration Specifier Semantic Analysis -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements semantic analysis for declaration specifiers. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/DeclSpec.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" +using namespace clang; + +/// getParsedSpecifiers - Return a bitmask of which flavors of specifiers this +/// +unsigned DeclSpec::getParsedSpecifiers() const { + unsigned Res = 0; + if (StorageClassSpec != SCS_unspecified || + SCS_thread_specified) + Res |= PQ_StorageClassSpecifier; + + if (TypeQualifiers != TQ_unspecified) + Res |= PQ_TypeQualifier; + + if (hasTypeSpecifier()) + Res |= PQ_TypeSpecifier; + + if (FS_inline_specified) + Res |= PQ_FunctionSpecifier; + return Res; +} + +const char *DeclSpec::getSpecifierName(DeclSpec::SCS S) { + switch (S) { + default: assert(0 && "Unknown typespec!"); + case DeclSpec::SCS_unspecified: return "unspecified"; + case DeclSpec::SCS_typedef: return "typedef"; + case DeclSpec::SCS_extern: return "extern"; + case DeclSpec::SCS_static: return "static"; + case DeclSpec::SCS_auto: return "auto"; + case DeclSpec::SCS_register: return "register"; + } +} + +bool DeclSpec::BadSpecifier(SCS S, const char *&PrevSpec) { + PrevSpec = getSpecifierName(S); + return true; +} + +bool DeclSpec::BadSpecifier(TSW W, const char *&PrevSpec) { + switch (W) { + case TSW_unspecified: PrevSpec = "unspecified"; break; + case TSW_short: PrevSpec = "short"; break; + case TSW_long: PrevSpec = "long"; break; + case TSW_longlong: PrevSpec = "long long"; break; + } + return true; +} + +bool DeclSpec::BadSpecifier(TSC C, const char *&PrevSpec) { + switch (C) { + case TSC_unspecified: PrevSpec = "unspecified"; break; + case TSC_imaginary: PrevSpec = "imaginary"; break; + case TSC_complex: PrevSpec = "complex"; break; + } + return true; +} + + +bool DeclSpec::BadSpecifier(TSS S, const char *&PrevSpec) { + switch (S) { + case TSS_unspecified: PrevSpec = "unspecified"; break; + case TSS_signed: PrevSpec = "signed"; break; + case TSS_unsigned: PrevSpec = "unsigned"; break; + } + return true; +} + +const char *DeclSpec::getSpecifierName(DeclSpec::TST T) { + switch (T) { + default: assert(0 && "Unknown typespec!"); + case DeclSpec::TST_unspecified: return "unspecified"; + case DeclSpec::TST_void: return "void"; + case DeclSpec::TST_char: return "char"; + case DeclSpec::TST_int: return "int"; + case DeclSpec::TST_float: return "float"; + case DeclSpec::TST_double: return "double"; + case DeclSpec::TST_bool: return "_Bool"; + case DeclSpec::TST_decimal32: return "_Decimal32"; + case DeclSpec::TST_decimal64: return "_Decimal64"; + case DeclSpec::TST_decimal128: return "_Decimal128"; + case DeclSpec::TST_enum: return "enum"; + case DeclSpec::TST_union: return "union"; + case DeclSpec::TST_struct: return "struct"; + case DeclSpec::TST_typedef: return "typedef"; + case DeclSpec::TST_typeofType: + case DeclSpec::TST_typeofExpr: return "typeof"; + } +} + +bool DeclSpec::BadSpecifier(TST T, const char *&PrevSpec) { + PrevSpec = getSpecifierName(T); + return true; +} + +bool DeclSpec::BadSpecifier(TQ T, const char *&PrevSpec) { + switch (T) { + case DeclSpec::TQ_unspecified: PrevSpec = "unspecified"; break; + case DeclSpec::TQ_const: PrevSpec = "const"; break; + case DeclSpec::TQ_restrict: PrevSpec = "restrict"; break; + case DeclSpec::TQ_volatile: PrevSpec = "volatile"; break; + } + return true; +} + +bool DeclSpec::SetStorageClassSpec(SCS S, SourceLocation Loc, + const char *&PrevSpec) { + if (StorageClassSpec != SCS_unspecified) + return BadSpecifier( (SCS)StorageClassSpec, PrevSpec); + StorageClassSpec = S; + StorageClassSpecLoc = Loc; + return false; +} + +bool DeclSpec::SetStorageClassSpecThread(SourceLocation Loc, + const char *&PrevSpec) { + if (SCS_thread_specified) { + PrevSpec = "__thread"; + return true; + } + SCS_thread_specified = true; + SCS_threadLoc = Loc; + return false; +} + + +/// These methods set the specified attribute of the DeclSpec, but return true +/// and ignore the request if invalid (e.g. "extern" then "auto" is +/// specified). +bool DeclSpec::SetTypeSpecWidth(TSW W, SourceLocation Loc, + const char *&PrevSpec) { + if (TypeSpecWidth != TSW_unspecified && + // Allow turning long -> long long. + (W != TSW_longlong || TypeSpecWidth != TSW_long)) + return BadSpecifier( (TSW)TypeSpecWidth, PrevSpec); + TypeSpecWidth = W; + TSWLoc = Loc; + return false; +} + +bool DeclSpec::SetTypeSpecComplex(TSC C, SourceLocation Loc, + const char *&PrevSpec) { + if (TypeSpecComplex != TSC_unspecified) + return BadSpecifier( (TSC)TypeSpecComplex, PrevSpec); + TypeSpecComplex = C; + TSCLoc = Loc; + return false; +} + +bool DeclSpec::SetTypeSpecSign(TSS S, SourceLocation Loc, + const char *&PrevSpec) { + if (TypeSpecSign != TSS_unspecified) + return BadSpecifier( (TSS)TypeSpecSign, PrevSpec); + TypeSpecSign = S; + TSSLoc = Loc; + return false; +} + +bool DeclSpec::SetTypeSpecType(TST T, SourceLocation Loc, + const char *&PrevSpec, void *Rep) { + if (TypeSpecType != TST_unspecified) + return BadSpecifier( (TST)TypeSpecType, PrevSpec); + TypeSpecType = T; + TypeRep = Rep; + TSTLoc = Loc; + return false; +} + +bool DeclSpec::SetTypeQual(TQ T, SourceLocation Loc, const char *&PrevSpec, + const LangOptions &Lang) { + // Duplicates turn into warnings pre-C99. + if ((TypeQualifiers & T) && !Lang.C99) + return BadSpecifier(T, PrevSpec); + TypeQualifiers |= T; + + switch (T) { + default: assert(0 && "Unknown type qualifier!"); + case TQ_const: TQ_constLoc = Loc; break; + case TQ_restrict: TQ_restrictLoc = Loc; break; + case TQ_volatile: TQ_volatileLoc = Loc; break; + } + return false; +} + +bool DeclSpec::SetFunctionSpecInline(SourceLocation Loc, const char *&PrevSpec){ + // 'inline inline' is ok. + FS_inline_specified = true; + FS_inlineLoc = Loc; + return false; +} + + +/// Finish - This does final analysis of the declspec, rejecting things like +/// "_Imaginary" (lacking an FP type). This returns a diagnostic to issue or +/// diag::NUM_DIAGNOSTICS if there is no error. After calling this method, +/// DeclSpec is guaranteed self-consistent, even if an error occurred. +void DeclSpec::Finish(Diagnostic &D, SourceManager& SrcMgr, + const LangOptions &Lang) { + // Check the type specifier components first. + + // signed/unsigned are only valid with int/char. + if (TypeSpecSign != TSS_unspecified) { + if (TypeSpecType == TST_unspecified) + TypeSpecType = TST_int; // unsigned -> unsigned int, signed -> signed int. + else if (TypeSpecType != TST_int && TypeSpecType != TST_char) { + Diag(D, TSSLoc, SrcMgr, diag::err_invalid_sign_spec, + getSpecifierName( (TST)TypeSpecType)); + // signed double -> double. + TypeSpecSign = TSS_unspecified; + } + } + + // Validate the width of the type. + switch (TypeSpecWidth) { + case TSW_unspecified: break; + case TSW_short: // short int + case TSW_longlong: // long long int + if (TypeSpecType == TST_unspecified) + TypeSpecType = TST_int; // short -> short int, long long -> long long int. + else if (TypeSpecType != TST_int) { + Diag(D, TSWLoc, SrcMgr, + TypeSpecWidth == TSW_short ? diag::err_invalid_short_spec + : diag::err_invalid_longlong_spec, + getSpecifierName( (TST)TypeSpecType)); + TypeSpecType = TST_int; + } + break; + case TSW_long: // long double, long int + if (TypeSpecType == TST_unspecified) + TypeSpecType = TST_int; // long -> long int. + else if (TypeSpecType != TST_int && TypeSpecType != TST_double) { + Diag(D, TSWLoc, SrcMgr, diag::err_invalid_long_spec, + getSpecifierName( (TST)TypeSpecType)); + TypeSpecType = TST_int; + } + break; + } + + // TODO: if the implementation does not implement _Complex or _Imaginary, + // disallow their use. Need information about the backend. + if (TypeSpecComplex != TSC_unspecified) { + if (TypeSpecType == TST_unspecified) { + Diag(D, TSCLoc, SrcMgr, diag::ext_plain_complex); + TypeSpecType = TST_double; // _Complex -> _Complex double. + } else if (TypeSpecType == TST_int || TypeSpecType == TST_char) { + // Note that this intentionally doesn't include _Complex _Bool. + Diag(D, TSTLoc, SrcMgr, diag::ext_integer_complex); + } else if (TypeSpecType != TST_float && TypeSpecType != TST_double) { + Diag(D, TSCLoc, SrcMgr, diag::err_invalid_complex_spec, + getSpecifierName( (TST)TypeSpecType)); + TypeSpecComplex = TSC_unspecified; + } + } + + // Verify __thread. + if (SCS_thread_specified) { + if (StorageClassSpec == SCS_unspecified) { + StorageClassSpec = SCS_extern; // '__thread int' -> 'extern __thread int' + } else if (StorageClassSpec != SCS_extern && + StorageClassSpec != SCS_static) { + Diag(D, getStorageClassSpecLoc(), SrcMgr, diag::err_invalid_thread_spec, + getSpecifierName( (SCS)StorageClassSpec)); + SCS_thread_specified = false; + } + } + + // Okay, now we can infer the real type. + + // TODO: return "auto function" and other bad things based on the real type. + + // 'data definition has no type or storage class'? +} diff --git a/clang/lib/Parse/Makefile b/clang/lib/Parse/Makefile new file mode 100644 index 00000000000..b5d2653bb09 --- /dev/null +++ b/clang/lib/Parse/Makefile @@ -0,0 +1,22 @@ +##===- clang/lib/Parse/Makefile ----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements the Parser library for the C-Language front-end. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME := clangParse +BUILD_ARCHIVE = 1 +CXXFLAGS = -fno-rtti + +CPPFLAGS += -I$(PROJ_SRC_DIR)/../../include + +include $(LEVEL)/Makefile.common + diff --git a/clang/lib/Parse/MinimalAction.cpp b/clang/lib/Parse/MinimalAction.cpp new file mode 100644 index 00000000000..250fa76ccc4 --- /dev/null +++ b/clang/lib/Parse/MinimalAction.cpp @@ -0,0 +1,136 @@ +//===--- MinimalAction.cpp - Implement the MinimalAction class ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MinimalAction interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/Parser.h" +#include "clang/Parse/DeclSpec.h" +#include "clang/Parse/Scope.h" +using namespace clang; + +/// TypeNameInfo - A link exists here for each scope that an identifier is +/// defined. +struct TypeNameInfo { + TypeNameInfo *Prev; + bool isTypeName; + + TypeNameInfo(bool istypename, TypeNameInfo *prev) { + isTypeName = istypename; + Prev = prev; + } +}; + +void MinimalAction:: ActOnTranslationUnitScope(SourceLocation Loc, Scope *S) { + TUScope = S; + IdentifierInfo *II; + TypeNameInfo *TI; + + // recognize the ObjC built-in type identifiers. + II = &Idents.get("id"); + TI = new TypeNameInfo(1, II->getFETokenInfo<TypeNameInfo>()); + II->setFETokenInfo(TI); + II = &Idents.get("SEL"); + TI = new TypeNameInfo(1, II->getFETokenInfo<TypeNameInfo>()); + II->setFETokenInfo(TI); + II = &Idents.get("Class"); + TI = new TypeNameInfo(1, II->getFETokenInfo<TypeNameInfo>()); + II->setFETokenInfo(TI); + II = &Idents.get("Protocol"); + TI = new TypeNameInfo(1, II->getFETokenInfo<TypeNameInfo>()); + II->setFETokenInfo(TI); +} + +/// isTypeName - This looks at the IdentifierInfo::FETokenInfo field to +/// determine whether the name is a type name (objc class name or typedef) or +/// not in this scope. +Action::DeclTy * +MinimalAction::isTypeName(const IdentifierInfo &II, Scope *S) const { + if (TypeNameInfo *TI = II.getFETokenInfo<TypeNameInfo>()) + if (TI->isTypeName) + return TI; + return 0; +} + +/// ActOnDeclarator - If this is a typedef declarator, we modify the +/// IdentifierInfo::FETokenInfo field to keep track of this fact, until S is +/// popped. +Action::DeclTy * +MinimalAction::ActOnDeclarator(Scope *S, Declarator &D, DeclTy *LastInGroup) { + IdentifierInfo *II = D.getIdentifier(); + + // If there is no identifier associated with this declarator, bail out. + if (II == 0) return 0; + + TypeNameInfo *weCurrentlyHaveTypeInfo = II->getFETokenInfo<TypeNameInfo>(); + bool isTypeName = + D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_typedef; + + // this check avoids creating TypeNameInfo objects for the common case. + // It does need to handle the uncommon case of shadowing a typedef name with a + // non-typedef name. e.g. { typedef int a; a xx; { int a; } } + if (weCurrentlyHaveTypeInfo || isTypeName) { + TypeNameInfo *TI = new TypeNameInfo(isTypeName, weCurrentlyHaveTypeInfo); + + II->setFETokenInfo(TI); + + // Remember that this needs to be removed when the scope is popped. + S->AddDecl(II); + } + return 0; +} + +Action::DeclTy * +MinimalAction::ActOnStartClassInterface(SourceLocation AtInterafceLoc, + IdentifierInfo *ClassName, SourceLocation ClassLoc, + IdentifierInfo *SuperName, SourceLocation SuperLoc, + IdentifierInfo **ProtocolNames, unsigned NumProtocols, + SourceLocation EndProtoLoc, AttributeList *AttrList) { + TypeNameInfo *TI = + new TypeNameInfo(1, ClassName->getFETokenInfo<TypeNameInfo>()); + + ClassName->setFETokenInfo(TI); + return 0; +} + +/// ActOnForwardClassDeclaration - +/// Scope will always be top level file scope. +Action::DeclTy * +MinimalAction::ActOnForwardClassDeclaration(SourceLocation AtClassLoc, + IdentifierInfo **IdentList, unsigned NumElts) { + for (unsigned i = 0; i != NumElts; ++i) { + TypeNameInfo *TI = + new TypeNameInfo(1, IdentList[i]->getFETokenInfo<TypeNameInfo>()); + + IdentList[i]->setFETokenInfo(TI); + + // Remember that this needs to be removed when the scope is popped. + TUScope->AddDecl(IdentList[i]); + } + return 0; +} + +/// ActOnPopScope - When a scope is popped, if any typedefs are now out-of-scope, +/// they are removed from the IdentifierInfo::FETokenInfo field. +void MinimalAction::ActOnPopScope(SourceLocation Loc, Scope *S) { + for (Scope::decl_iterator I = S->decl_begin(), E = S->decl_end(); + I != E; ++I) { + IdentifierInfo &II = *static_cast<IdentifierInfo*>(*I); + TypeNameInfo *TI = II.getFETokenInfo<TypeNameInfo>(); + assert(TI && "This decl didn't get pushed??"); + + if (TI) { + TypeNameInfo *Next = TI->Prev; + delete TI; + + II.setFETokenInfo(Next); + } + } +} diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp new file mode 100644 index 00000000000..7d15e984ee6 --- /dev/null +++ b/clang/lib/Parse/ParseDecl.cpp @@ -0,0 +1,1540 @@ +//===--- ParseDecl.cpp - Declaration Parsing ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Declaration portions of the Parser interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/Parser.h" +#include "clang/Parse/DeclSpec.h" +#include "clang/Parse/Scope.h" +#include "llvm/ADT/SmallSet.h" +using namespace clang; + +//===----------------------------------------------------------------------===// +// C99 6.7: Declarations. +//===----------------------------------------------------------------------===// + +/// ParseTypeName +/// type-name: [C99 6.7.6] +/// specifier-qualifier-list abstract-declarator[opt] +Parser::TypeTy *Parser::ParseTypeName() { + // Parse the common declaration-specifiers piece. + DeclSpec DS; + ParseSpecifierQualifierList(DS); + + // Parse the abstract-declarator, if present. + Declarator DeclaratorInfo(DS, Declarator::TypeNameContext); + ParseDeclarator(DeclaratorInfo); + + return Actions.ActOnTypeName(CurScope, DeclaratorInfo).Val; +} + +/// ParseAttributes - Parse a non-empty attributes list. +/// +/// [GNU] attributes: +/// attribute +/// attributes attribute +/// +/// [GNU] attribute: +/// '__attribute__' '(' '(' attribute-list ')' ')' +/// +/// [GNU] attribute-list: +/// attrib +/// attribute_list ',' attrib +/// +/// [GNU] attrib: +/// empty +/// attrib-name +/// attrib-name '(' identifier ')' +/// attrib-name '(' identifier ',' nonempty-expr-list ')' +/// attrib-name '(' argument-expression-list [C99 6.5.2] ')' +/// +/// [GNU] attrib-name: +/// identifier +/// typespec +/// typequal +/// storageclass +/// +/// FIXME: The GCC grammar/code for this construct implies we need two +/// token lookahead. Comment from gcc: "If they start with an identifier +/// which is followed by a comma or close parenthesis, then the arguments +/// start with that identifier; otherwise they are an expression list." +/// +/// At the moment, I am not doing 2 token lookahead. I am also unaware of +/// any attributes that don't work (based on my limited testing). Most +/// attributes are very simple in practice. Until we find a bug, I don't see +/// a pressing need to implement the 2 token lookahead. + +AttributeList *Parser::ParseAttributes() { + assert(Tok.is(tok::kw___attribute) && "Not an attribute list!"); + + AttributeList *CurrAttr = 0; + + while (Tok.is(tok::kw___attribute)) { + ConsumeToken(); + if (ExpectAndConsume(tok::l_paren, diag::err_expected_lparen_after, + "attribute")) { + SkipUntil(tok::r_paren, true); // skip until ) or ; + return CurrAttr; + } + if (ExpectAndConsume(tok::l_paren, diag::err_expected_lparen_after, "(")) { + SkipUntil(tok::r_paren, true); // skip until ) or ; + return CurrAttr; + } + // Parse the attribute-list. e.g. __attribute__(( weak, alias("__f") )) + while (Tok.is(tok::identifier) || isDeclarationSpecifier() || + Tok.is(tok::comma)) { + + if (Tok.is(tok::comma)) { + // allows for empty/non-empty attributes. ((__vector_size__(16),,,,)) + ConsumeToken(); + continue; + } + // we have an identifier or declaration specifier (const, int, etc.) + IdentifierInfo *AttrName = Tok.getIdentifierInfo(); + SourceLocation AttrNameLoc = ConsumeToken(); + + // check if we have a "paramterized" attribute + if (Tok.is(tok::l_paren)) { + ConsumeParen(); // ignore the left paren loc for now + + if (Tok.is(tok::identifier)) { + IdentifierInfo *ParmName = Tok.getIdentifierInfo(); + SourceLocation ParmLoc = ConsumeToken(); + + if (Tok.is(tok::r_paren)) { + // __attribute__(( mode(byte) )) + ConsumeParen(); // ignore the right paren loc for now + CurrAttr = new AttributeList(AttrName, AttrNameLoc, + ParmName, ParmLoc, 0, 0, CurrAttr); + } else if (Tok.is(tok::comma)) { + ConsumeToken(); + // __attribute__(( format(printf, 1, 2) )) + llvm::SmallVector<ExprTy*, 8> ArgExprs; + bool ArgExprsOk = true; + + // now parse the non-empty comma separated list of expressions + while (1) { + ExprResult ArgExpr = ParseAssignmentExpression(); + if (ArgExpr.isInvalid) { + ArgExprsOk = false; + SkipUntil(tok::r_paren); + break; + } else { + ArgExprs.push_back(ArgExpr.Val); + } + if (Tok.isNot(tok::comma)) + break; + ConsumeToken(); // Eat the comma, move to the next argument + } + if (ArgExprsOk && Tok.is(tok::r_paren)) { + ConsumeParen(); // ignore the right paren loc for now + CurrAttr = new AttributeList(AttrName, AttrNameLoc, ParmName, + ParmLoc, &ArgExprs[0], ArgExprs.size(), CurrAttr); + } + } + } else { // not an identifier + // parse a possibly empty comma separated list of expressions + if (Tok.is(tok::r_paren)) { + // __attribute__(( nonnull() )) + ConsumeParen(); // ignore the right paren loc for now + CurrAttr = new AttributeList(AttrName, AttrNameLoc, + 0, SourceLocation(), 0, 0, CurrAttr); + } else { + // __attribute__(( aligned(16) )) + llvm::SmallVector<ExprTy*, 8> ArgExprs; + bool ArgExprsOk = true; + + // now parse the list of expressions + while (1) { + ExprResult ArgExpr = ParseAssignmentExpression(); + if (ArgExpr.isInvalid) { + ArgExprsOk = false; + SkipUntil(tok::r_paren); + break; + } else { + ArgExprs.push_back(ArgExpr.Val); + } + if (Tok.isNot(tok::comma)) + break; + ConsumeToken(); // Eat the comma, move to the next argument + } + // Match the ')'. + if (ArgExprsOk && Tok.is(tok::r_paren)) { + ConsumeParen(); // ignore the right paren loc for now + CurrAttr = new AttributeList(AttrName, AttrNameLoc, 0, + SourceLocation(), &ArgExprs[0], ArgExprs.size(), + CurrAttr); + } + } + } + } else { + CurrAttr = new AttributeList(AttrName, AttrNameLoc, + 0, SourceLocation(), 0, 0, CurrAttr); + } + } + if (ExpectAndConsume(tok::r_paren, diag::err_expected_rparen)) + SkipUntil(tok::r_paren, false); + if (ExpectAndConsume(tok::r_paren, diag::err_expected_rparen)) + SkipUntil(tok::r_paren, false); + } + return CurrAttr; +} + +/// ParseDeclaration - Parse a full 'declaration', which consists of +/// declaration-specifiers, some number of declarators, and a semicolon. +/// 'Context' should be a Declarator::TheContext value. +/// +/// declaration: [C99 6.7] +/// block-declaration -> +/// simple-declaration +/// others [FIXME] +/// [C++] namespace-definition +/// others... [FIXME] +/// +Parser::DeclTy *Parser::ParseDeclaration(unsigned Context) { + switch (Tok.getKind()) { + case tok::kw_namespace: + return ParseNamespace(Context); + default: + return ParseSimpleDeclaration(Context); + } +} + +/// simple-declaration: [C99 6.7: declaration] [C++ 7p1: dcl.dcl] +/// declaration-specifiers init-declarator-list[opt] ';' +///[C90/C++]init-declarator-list ';' [TODO] +/// [OMP] threadprivate-directive [TODO] +Parser::DeclTy *Parser::ParseSimpleDeclaration(unsigned Context) { + // Parse the common declaration-specifiers piece. + DeclSpec DS; + ParseDeclarationSpecifiers(DS); + + // C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };" + // declaration-specifiers init-declarator-list[opt] ';' + if (Tok.is(tok::semi)) { + ConsumeToken(); + return Actions.ParsedFreeStandingDeclSpec(CurScope, DS); + } + + Declarator DeclaratorInfo(DS, (Declarator::TheContext)Context); + ParseDeclarator(DeclaratorInfo); + + return ParseInitDeclaratorListAfterFirstDeclarator(DeclaratorInfo); +} + + +/// ParseInitDeclaratorListAfterFirstDeclarator - Parse 'declaration' after +/// parsing 'declaration-specifiers declarator'. This method is split out this +/// way to handle the ambiguity between top-level function-definitions and +/// declarations. +/// +/// init-declarator-list: [C99 6.7] +/// init-declarator +/// init-declarator-list ',' init-declarator +/// init-declarator: [C99 6.7] +/// declarator +/// declarator '=' initializer +/// [GNU] declarator simple-asm-expr[opt] attributes[opt] +/// [GNU] declarator simple-asm-expr[opt] attributes[opt] '=' initializer +/// +Parser::DeclTy *Parser:: +ParseInitDeclaratorListAfterFirstDeclarator(Declarator &D) { + + // Declarators may be grouped together ("int X, *Y, Z();"). Provide info so + // that they can be chained properly if the actions want this. + Parser::DeclTy *LastDeclInGroup = 0; + + // At this point, we know that it is not a function definition. Parse the + // rest of the init-declarator-list. + while (1) { + // If a simple-asm-expr is present, parse it. + if (Tok.is(tok::kw_asm)) + ParseSimpleAsm(); + + // If attributes are present, parse them. + if (Tok.is(tok::kw___attribute)) + D.AddAttributes(ParseAttributes()); + + // Inform the current actions module that we just parsed this declarator. + // FIXME: pass asm & attributes. + LastDeclInGroup = Actions.ActOnDeclarator(CurScope, D, LastDeclInGroup); + + // Parse declarator '=' initializer. + ExprResult Init; + if (Tok.is(tok::equal)) { + ConsumeToken(); + Init = ParseInitializer(); + if (Init.isInvalid) { + SkipUntil(tok::semi); + return 0; + } + Actions.AddInitializerToDecl(LastDeclInGroup, Init.Val); + } + + // If we don't have a comma, it is either the end of the list (a ';') or an + // error, bail out. + if (Tok.isNot(tok::comma)) + break; + + // Consume the comma. + ConsumeToken(); + + // Parse the next declarator. + D.clear(); + ParseDeclarator(D); + } + + if (Tok.is(tok::semi)) { + ConsumeToken(); + return Actions.FinalizeDeclaratorGroup(CurScope, LastDeclInGroup); + } + // If this is an ObjC2 for-each loop, this is a successful declarator + // parse. The syntax for these looks like: + // 'for' '(' declaration 'in' expr ')' statement + if (D.getContext() == Declarator::ForContext && isTokIdentifier_in()) { + return Actions.FinalizeDeclaratorGroup(CurScope, LastDeclInGroup); + } + Diag(Tok, diag::err_parse_error); + // Skip to end of block or statement + SkipUntil(tok::r_brace, true, true); + if (Tok.is(tok::semi)) + ConsumeToken(); + return 0; +} + +/// ParseSpecifierQualifierList +/// specifier-qualifier-list: +/// type-specifier specifier-qualifier-list[opt] +/// type-qualifier specifier-qualifier-list[opt] +/// [GNU] attributes specifier-qualifier-list[opt] +/// +void Parser::ParseSpecifierQualifierList(DeclSpec &DS) { + /// specifier-qualifier-list is a subset of declaration-specifiers. Just + /// parse declaration-specifiers and complain about extra stuff. + ParseDeclarationSpecifiers(DS); + + // Validate declspec for type-name. + unsigned Specs = DS.getParsedSpecifiers(); + if (Specs == DeclSpec::PQ_None) + Diag(Tok, diag::err_typename_requires_specqual); + + // Issue diagnostic and remove storage class if present. + if (Specs & DeclSpec::PQ_StorageClassSpecifier) { + if (DS.getStorageClassSpecLoc().isValid()) + Diag(DS.getStorageClassSpecLoc(),diag::err_typename_invalid_storageclass); + else + Diag(DS.getThreadSpecLoc(), diag::err_typename_invalid_storageclass); + DS.ClearStorageClassSpecs(); + } + + // Issue diagnostic and remove function specfier if present. + if (Specs & DeclSpec::PQ_FunctionSpecifier) { + Diag(DS.getInlineSpecLoc(), diag::err_typename_invalid_functionspec); + DS.ClearFunctionSpecs(); + } +} + +/// ParseDeclarationSpecifiers +/// declaration-specifiers: [C99 6.7] +/// storage-class-specifier declaration-specifiers[opt] +/// type-specifier declaration-specifiers[opt] +/// type-qualifier declaration-specifiers[opt] +/// [C99] function-specifier declaration-specifiers[opt] +/// [GNU] attributes declaration-specifiers[opt] +/// +/// storage-class-specifier: [C99 6.7.1] +/// 'typedef' +/// 'extern' +/// 'static' +/// 'auto' +/// 'register' +/// [GNU] '__thread' +/// type-specifier: [C99 6.7.2] +/// 'void' +/// 'char' +/// 'short' +/// 'int' +/// 'long' +/// 'float' +/// 'double' +/// 'signed' +/// 'unsigned' +/// struct-or-union-specifier +/// enum-specifier +/// typedef-name +/// [C++] 'bool' +/// [C99] '_Bool' +/// [C99] '_Complex' +/// [C99] '_Imaginary' // Removed in TC2? +/// [GNU] '_Decimal32' +/// [GNU] '_Decimal64' +/// [GNU] '_Decimal128' +/// [GNU] typeof-specifier +/// [OBJC] class-name objc-protocol-refs[opt] [TODO] +/// [OBJC] typedef-name objc-protocol-refs[opt] [TODO] +/// type-qualifier: +/// 'const' +/// 'volatile' +/// [C99] 'restrict' +/// function-specifier: [C99 6.7.4] +/// [C99] 'inline' +/// +void Parser::ParseDeclarationSpecifiers(DeclSpec &DS) { + DS.SetRangeStart(Tok.getLocation()); + while (1) { + int isInvalid = false; + const char *PrevSpec = 0; + SourceLocation Loc = Tok.getLocation(); + + switch (Tok.getKind()) { + // typedef-name + case tok::identifier: + // This identifier can only be a typedef name if we haven't already seen + // a type-specifier. Without this check we misparse: + // typedef int X; struct Y { short X; }; as 'short int'. + if (!DS.hasTypeSpecifier()) { + // It has to be available as a typedef too! + if (void *TypeRep = Actions.isTypeName(*Tok.getIdentifierInfo(), + CurScope)) { + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_typedef, Loc, PrevSpec, + TypeRep); + if (isInvalid) + break; + // FIXME: restrict this to "id" and ObjC classnames. + DS.SetRangeEnd(Tok.getLocation()); + ConsumeToken(); // The identifier + if (Tok.is(tok::less)) { + SourceLocation endProtoLoc; + llvm::SmallVector<IdentifierInfo *, 8> ProtocolRefs; + ParseObjCProtocolReferences(ProtocolRefs, endProtoLoc); + llvm::SmallVector<DeclTy *, 8> *ProtocolDecl = + new llvm::SmallVector<DeclTy *, 8>; + DS.setProtocolQualifiers(ProtocolDecl); + Actions.FindProtocolDeclaration(Loc, + &ProtocolRefs[0], ProtocolRefs.size(), + *ProtocolDecl); + } + continue; + } + } + // FALL THROUGH. + default: + // If this is not a declaration specifier token, we're done reading decl + // specifiers. First verify that DeclSpec's are consistent. + DS.Finish(Diags, PP.getSourceManager(), getLang()); + return; + + // GNU attributes support. + case tok::kw___attribute: + DS.AddAttributes(ParseAttributes()); + continue; + + // storage-class-specifier + case tok::kw_typedef: + isInvalid = DS.SetStorageClassSpec(DeclSpec::SCS_typedef, Loc, PrevSpec); + break; + case tok::kw_extern: + if (DS.isThreadSpecified()) + Diag(Tok, diag::ext_thread_before, "extern"); + isInvalid = DS.SetStorageClassSpec(DeclSpec::SCS_extern, Loc, PrevSpec); + break; + case tok::kw___private_extern__: + isInvalid = DS.SetStorageClassSpec(DeclSpec::SCS_private_extern, Loc, PrevSpec); + break; + case tok::kw_static: + if (DS.isThreadSpecified()) + Diag(Tok, diag::ext_thread_before, "static"); + isInvalid = DS.SetStorageClassSpec(DeclSpec::SCS_static, Loc, PrevSpec); + break; + case tok::kw_auto: + isInvalid = DS.SetStorageClassSpec(DeclSpec::SCS_auto, Loc, PrevSpec); + break; + case tok::kw_register: + isInvalid = DS.SetStorageClassSpec(DeclSpec::SCS_register, Loc, PrevSpec); + break; + case tok::kw___thread: + isInvalid = DS.SetStorageClassSpecThread(Loc, PrevSpec)*2; + break; + + // type-specifiers + case tok::kw_short: + isInvalid = DS.SetTypeSpecWidth(DeclSpec::TSW_short, Loc, PrevSpec); + break; + case tok::kw_long: + if (DS.getTypeSpecWidth() != DeclSpec::TSW_long) + isInvalid = DS.SetTypeSpecWidth(DeclSpec::TSW_long, Loc, PrevSpec); + else + isInvalid = DS.SetTypeSpecWidth(DeclSpec::TSW_longlong, Loc, PrevSpec); + break; + case tok::kw_signed: + isInvalid = DS.SetTypeSpecSign(DeclSpec::TSS_signed, Loc, PrevSpec); + break; + case tok::kw_unsigned: + isInvalid = DS.SetTypeSpecSign(DeclSpec::TSS_unsigned, Loc, PrevSpec); + break; + case tok::kw__Complex: + isInvalid = DS.SetTypeSpecComplex(DeclSpec::TSC_complex, Loc, PrevSpec); + break; + case tok::kw__Imaginary: + isInvalid = DS.SetTypeSpecComplex(DeclSpec::TSC_imaginary, Loc, PrevSpec); + break; + case tok::kw_void: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_void, Loc, PrevSpec); + break; + case tok::kw_char: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_char, Loc, PrevSpec); + break; + case tok::kw_int: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_int, Loc, PrevSpec); + break; + case tok::kw_float: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_float, Loc, PrevSpec); + break; + case tok::kw_double: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_double, Loc, PrevSpec); + break; + case tok::kw_bool: // [C++ 2.11p1] + case tok::kw__Bool: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_bool, Loc, PrevSpec); + break; + case tok::kw__Decimal32: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_decimal32, Loc, PrevSpec); + break; + case tok::kw__Decimal64: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_decimal64, Loc, PrevSpec); + break; + case tok::kw__Decimal128: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_decimal128, Loc, PrevSpec); + break; + + case tok::kw_struct: + case tok::kw_union: + ParseStructUnionSpecifier(DS); + continue; + case tok::kw_enum: + ParseEnumSpecifier(DS); + continue; + + // GNU typeof support. + case tok::kw_typeof: + ParseTypeofSpecifier(DS); + continue; + + // type-qualifier + case tok::kw_const: + isInvalid = DS.SetTypeQual(DeclSpec::TQ_const , Loc, PrevSpec, + getLang())*2; + break; + case tok::kw_volatile: + isInvalid = DS.SetTypeQual(DeclSpec::TQ_volatile, Loc, PrevSpec, + getLang())*2; + break; + case tok::kw_restrict: + isInvalid = DS.SetTypeQual(DeclSpec::TQ_restrict, Loc, PrevSpec, + getLang())*2; + break; + + // function-specifier + case tok::kw_inline: + isInvalid = DS.SetFunctionSpecInline(Loc, PrevSpec); + break; + } + // If the specifier combination wasn't legal, issue a diagnostic. + if (isInvalid) { + assert(PrevSpec && "Method did not return previous specifier!"); + if (isInvalid == 1) // Error. + Diag(Tok, diag::err_invalid_decl_spec_combination, PrevSpec); + else // extwarn. + Diag(Tok, diag::ext_duplicate_declspec, PrevSpec); + } + DS.SetRangeEnd(Tok.getLocation()); + ConsumeToken(); + } +} + +/// ParseTag - Parse "struct-or-union-or-class-or-enum identifier[opt]", where +/// the first token has already been read and has been turned into an instance +/// of DeclSpec::TST (TagType). This returns true if there is an error parsing, +/// otherwise it returns false and fills in Decl. +bool Parser::ParseTag(DeclTy *&Decl, unsigned TagType, SourceLocation StartLoc){ + AttributeList *Attr = 0; + // If attributes exist after tag, parse them. + if (Tok.is(tok::kw___attribute)) + Attr = ParseAttributes(); + + // Must have either 'struct name' or 'struct {...}'. + if (Tok.isNot(tok::identifier) && Tok.isNot(tok::l_brace)) { + Diag(Tok, diag::err_expected_ident_lbrace); + + // Skip the rest of this declarator, up until the comma or semicolon. + SkipUntil(tok::comma, true); + return true; + } + + // If an identifier is present, consume and remember it. + IdentifierInfo *Name = 0; + SourceLocation NameLoc; + if (Tok.is(tok::identifier)) { + Name = Tok.getIdentifierInfo(); + NameLoc = ConsumeToken(); + } + + // There are three options here. If we have 'struct foo;', then this is a + // forward declaration. If we have 'struct foo {...' then this is a + // definition. Otherwise we have something like 'struct foo xyz', a reference. + // + // This is needed to handle stuff like this right (C99 6.7.2.3p11): + // struct foo {..}; void bar() { struct foo; } <- new foo in bar. + // struct foo {..}; void bar() { struct foo x; } <- use of old foo. + // + Action::TagKind TK; + if (Tok.is(tok::l_brace)) + TK = Action::TK_Definition; + else if (Tok.is(tok::semi)) + TK = Action::TK_Declaration; + else + TK = Action::TK_Reference; + Decl = Actions.ActOnTag(CurScope, TagType, TK, StartLoc, Name, NameLoc, Attr); + return false; +} + + +/// ParseStructUnionSpecifier +/// struct-or-union-specifier: [C99 6.7.2.1] +/// struct-or-union identifier[opt] '{' struct-contents '}' +/// struct-or-union identifier +/// [GNU] struct-or-union attributes[opt] identifier[opt] '{' struct-contents +/// '}' attributes[opt] +/// [GNU] struct-or-union attributes[opt] identifier +/// struct-or-union: +/// 'struct' +/// 'union' +/// +void Parser::ParseStructUnionSpecifier(DeclSpec &DS) { + assert((Tok.is(tok::kw_struct) || Tok.is(tok::kw_union)) && + "Not a struct/union specifier"); + DeclSpec::TST TagType = + Tok.is(tok::kw_union) ? DeclSpec::TST_union : DeclSpec::TST_struct; + SourceLocation StartLoc = ConsumeToken(); + + // Parse the tag portion of this. + DeclTy *TagDecl; + if (ParseTag(TagDecl, TagType, StartLoc)) + return; + + // If there is a body, parse it and inform the actions module. + if (Tok.is(tok::l_brace)) + ParseStructUnionBody(StartLoc, TagType, TagDecl); + + const char *PrevSpec = 0; + if (DS.SetTypeSpecType(TagType, StartLoc, PrevSpec, TagDecl)) + Diag(StartLoc, diag::err_invalid_decl_spec_combination, PrevSpec); +} + +/// ParseStructDeclaration - Parse a struct declaration without the terminating +/// semicolon. +/// +/// struct-declaration: +/// specifier-qualifier-list struct-declarator-list +/// [GNU] __extension__ struct-declaration +/// [GNU] specifier-qualifier-list +/// struct-declarator-list: +/// struct-declarator +/// struct-declarator-list ',' struct-declarator +/// [GNU] struct-declarator-list ',' attributes[opt] struct-declarator +/// struct-declarator: +/// declarator +/// [GNU] declarator attributes[opt] +/// declarator[opt] ':' constant-expression +/// [GNU] declarator[opt] ':' constant-expression attributes[opt] +/// +void Parser::ParseStructDeclaration(DeclTy *TagDecl, + llvm::SmallVectorImpl<DeclTy*> &FieldDecls) { + // FIXME: When __extension__ is specified, disable extension diagnostics. + if (Tok.is(tok::kw___extension__)) + ConsumeToken(); + + // Parse the common specifier-qualifiers-list piece. + DeclSpec DS; + SourceLocation SpecQualLoc = Tok.getLocation(); + ParseSpecifierQualifierList(DS); + // TODO: Does specifier-qualifier list correctly check that *something* is + // specified? + + // If there are no declarators, issue a warning. + if (Tok.is(tok::semi)) { + Diag(SpecQualLoc, diag::w_no_declarators); + return; + } + + // Read struct-declarators until we find the semicolon. + Declarator DeclaratorInfo(DS, Declarator::MemberContext); + + while (1) { + /// struct-declarator: declarator + /// struct-declarator: declarator[opt] ':' constant-expression + if (Tok.isNot(tok::colon)) + ParseDeclarator(DeclaratorInfo); + + ExprTy *BitfieldSize = 0; + if (Tok.is(tok::colon)) { + ConsumeToken(); + ExprResult Res = ParseConstantExpression(); + if (Res.isInvalid) { + SkipUntil(tok::semi, true, true); + } else { + BitfieldSize = Res.Val; + } + } + + // If attributes exist after the declarator, parse them. + if (Tok.is(tok::kw___attribute)) + DeclaratorInfo.AddAttributes(ParseAttributes()); + + // Install the declarator into the current TagDecl. + DeclTy *Field = Actions.ActOnField(CurScope, TagDecl, SpecQualLoc, + DeclaratorInfo, BitfieldSize); + FieldDecls.push_back(Field); + + // If we don't have a comma, it is either the end of the list (a ';') + // or an error, bail out. + if (Tok.isNot(tok::comma)) + return; + + // Consume the comma. + ConsumeToken(); + + // Parse the next declarator. + DeclaratorInfo.clear(); + + // Attributes are only allowed on the second declarator. + if (Tok.is(tok::kw___attribute)) + DeclaratorInfo.AddAttributes(ParseAttributes()); + } +} + +/// ParseStructUnionBody +/// struct-contents: +/// struct-declaration-list +/// [EXT] empty +/// [GNU] "struct-declaration-list" without terminatoring ';' +/// struct-declaration-list: +/// struct-declaration +/// struct-declaration-list struct-declaration +/// [OBC] '@' 'defs' '(' class-name ')' [TODO] +/// +void Parser::ParseStructUnionBody(SourceLocation RecordLoc, + unsigned TagType, DeclTy *TagDecl) { + SourceLocation LBraceLoc = ConsumeBrace(); + + // Empty structs are an extension in C (C99 6.7.2.1p7), but are allowed in + // C++. + if (Tok.is(tok::r_brace)) + Diag(Tok, diag::ext_empty_struct_union_enum, + DeclSpec::getSpecifierName((DeclSpec::TST)TagType)); + + llvm::SmallVector<DeclTy*, 32> FieldDecls; + + // While we still have something to read, read the declarations in the struct. + while (Tok.isNot(tok::r_brace) && Tok.isNot(tok::eof)) { + // Each iteration of this loop reads one struct-declaration. + + // Check for extraneous top-level semicolon. + if (Tok.is(tok::semi)) { + Diag(Tok, diag::ext_extra_struct_semi); + ConsumeToken(); + continue; + } + ParseStructDeclaration(TagDecl, FieldDecls); + + if (Tok.is(tok::semi)) { + ConsumeToken(); + } else if (Tok.is(tok::r_brace)) { + Diag(Tok.getLocation(), diag::ext_expected_semi_decl_list); + break; + } else { + Diag(Tok, diag::err_expected_semi_decl_list); + // Skip to end of block or statement + SkipUntil(tok::r_brace, true, true); + } + } + + SourceLocation RBraceLoc = MatchRHSPunctuation(tok::r_brace, LBraceLoc); + + Actions.ActOnFields(CurScope, + RecordLoc,TagDecl,&FieldDecls[0],FieldDecls.size(), + LBraceLoc, RBraceLoc); + + AttributeList *AttrList = 0; + // If attributes exist after struct contents, parse them. + if (Tok.is(tok::kw___attribute)) + AttrList = ParseAttributes(); // FIXME: where should I put them? +} + + +/// ParseEnumSpecifier +/// enum-specifier: [C99 6.7.2.2] +/// 'enum' identifier[opt] '{' enumerator-list '}' +/// [C99] 'enum' identifier[opt] '{' enumerator-list ',' '}' +/// [GNU] 'enum' attributes[opt] identifier[opt] '{' enumerator-list ',' [opt] +/// '}' attributes[opt] +/// 'enum' identifier +/// [GNU] 'enum' attributes[opt] identifier +void Parser::ParseEnumSpecifier(DeclSpec &DS) { + assert(Tok.is(tok::kw_enum) && "Not an enum specifier"); + SourceLocation StartLoc = ConsumeToken(); + + // Parse the tag portion of this. + DeclTy *TagDecl; + if (ParseTag(TagDecl, DeclSpec::TST_enum, StartLoc)) + return; + + if (Tok.is(tok::l_brace)) + ParseEnumBody(StartLoc, TagDecl); + + // TODO: semantic analysis on the declspec for enums. + const char *PrevSpec = 0; + if (DS.SetTypeSpecType(DeclSpec::TST_enum, StartLoc, PrevSpec, TagDecl)) + Diag(StartLoc, diag::err_invalid_decl_spec_combination, PrevSpec); +} + +/// ParseEnumBody - Parse a {} enclosed enumerator-list. +/// enumerator-list: +/// enumerator +/// enumerator-list ',' enumerator +/// enumerator: +/// enumeration-constant +/// enumeration-constant '=' constant-expression +/// enumeration-constant: +/// identifier +/// +void Parser::ParseEnumBody(SourceLocation StartLoc, DeclTy *EnumDecl) { + SourceLocation LBraceLoc = ConsumeBrace(); + + // C does not allow an empty enumerator-list, C++ does [dcl.enum]. + if (Tok.is(tok::r_brace) && !getLang().CPlusPlus) + Diag(Tok, diag::ext_empty_struct_union_enum, "enum"); + + llvm::SmallVector<DeclTy*, 32> EnumConstantDecls; + + DeclTy *LastEnumConstDecl = 0; + + // Parse the enumerator-list. + while (Tok.is(tok::identifier)) { + IdentifierInfo *Ident = Tok.getIdentifierInfo(); + SourceLocation IdentLoc = ConsumeToken(); + + SourceLocation EqualLoc; + ExprTy *AssignedVal = 0; + if (Tok.is(tok::equal)) { + EqualLoc = ConsumeToken(); + ExprResult Res = ParseConstantExpression(); + if (Res.isInvalid) + SkipUntil(tok::comma, tok::r_brace, true, true); + else + AssignedVal = Res.Val; + } + + // Install the enumerator constant into EnumDecl. + DeclTy *EnumConstDecl = Actions.ActOnEnumConstant(CurScope, EnumDecl, + LastEnumConstDecl, + IdentLoc, Ident, + EqualLoc, AssignedVal); + EnumConstantDecls.push_back(EnumConstDecl); + LastEnumConstDecl = EnumConstDecl; + + if (Tok.isNot(tok::comma)) + break; + SourceLocation CommaLoc = ConsumeToken(); + + if (Tok.isNot(tok::identifier) && !getLang().C99) + Diag(CommaLoc, diag::ext_c99_enumerator_list_comma); + } + + // Eat the }. + MatchRHSPunctuation(tok::r_brace, LBraceLoc); + + Actions.ActOnEnumBody(StartLoc, EnumDecl, &EnumConstantDecls[0], + EnumConstantDecls.size()); + + DeclTy *AttrList = 0; + // If attributes exist after the identifier list, parse them. + if (Tok.is(tok::kw___attribute)) + AttrList = ParseAttributes(); // FIXME: where do they do? +} + +/// isTypeSpecifierQualifier - Return true if the current token could be the +/// start of a type-qualifier-list. +bool Parser::isTypeQualifier() const { + switch (Tok.getKind()) { + default: return false; + // type-qualifier + case tok::kw_const: + case tok::kw_volatile: + case tok::kw_restrict: + return true; + } +} + +/// isTypeSpecifierQualifier - Return true if the current token could be the +/// start of a specifier-qualifier-list. +bool Parser::isTypeSpecifierQualifier() const { + switch (Tok.getKind()) { + default: return false; + // GNU attributes support. + case tok::kw___attribute: + // GNU typeof support. + case tok::kw_typeof: + + // type-specifiers + case tok::kw_short: + case tok::kw_long: + case tok::kw_signed: + case tok::kw_unsigned: + case tok::kw__Complex: + case tok::kw__Imaginary: + case tok::kw_void: + case tok::kw_char: + case tok::kw_int: + case tok::kw_float: + case tok::kw_double: + case tok::kw_bool: + case tok::kw__Bool: + case tok::kw__Decimal32: + case tok::kw__Decimal64: + case tok::kw__Decimal128: + + // struct-or-union-specifier + case tok::kw_struct: + case tok::kw_union: + // enum-specifier + case tok::kw_enum: + + // type-qualifier + case tok::kw_const: + case tok::kw_volatile: + case tok::kw_restrict: + return true; + + // typedef-name + case tok::identifier: + return Actions.isTypeName(*Tok.getIdentifierInfo(), CurScope) != 0; + } +} + +/// isDeclarationSpecifier() - Return true if the current token is part of a +/// declaration specifier. +bool Parser::isDeclarationSpecifier() const { + switch (Tok.getKind()) { + default: return false; + // storage-class-specifier + case tok::kw_typedef: + case tok::kw_extern: + case tok::kw___private_extern__: + case tok::kw_static: + case tok::kw_auto: + case tok::kw_register: + case tok::kw___thread: + + // type-specifiers + case tok::kw_short: + case tok::kw_long: + case tok::kw_signed: + case tok::kw_unsigned: + case tok::kw__Complex: + case tok::kw__Imaginary: + case tok::kw_void: + case tok::kw_char: + case tok::kw_int: + case tok::kw_float: + case tok::kw_double: + case tok::kw_bool: + case tok::kw__Bool: + case tok::kw__Decimal32: + case tok::kw__Decimal64: + case tok::kw__Decimal128: + + // struct-or-union-specifier + case tok::kw_struct: + case tok::kw_union: + // enum-specifier + case tok::kw_enum: + + // type-qualifier + case tok::kw_const: + case tok::kw_volatile: + case tok::kw_restrict: + + // function-specifier + case tok::kw_inline: + + // GNU typeof support. + case tok::kw_typeof: + + // GNU attributes. + case tok::kw___attribute: + return true; + + // typedef-name + case tok::identifier: + return Actions.isTypeName(*Tok.getIdentifierInfo(), CurScope) != 0; + } +} + + +/// ParseTypeQualifierListOpt +/// type-qualifier-list: [C99 6.7.5] +/// type-qualifier +/// [GNU] attributes +/// type-qualifier-list type-qualifier +/// [GNU] type-qualifier-list attributes +/// +void Parser::ParseTypeQualifierListOpt(DeclSpec &DS) { + while (1) { + int isInvalid = false; + const char *PrevSpec = 0; + SourceLocation Loc = Tok.getLocation(); + + switch (Tok.getKind()) { + default: + // If this is not a type-qualifier token, we're done reading type + // qualifiers. First verify that DeclSpec's are consistent. + DS.Finish(Diags, PP.getSourceManager(), getLang()); + return; + case tok::kw_const: + isInvalid = DS.SetTypeQual(DeclSpec::TQ_const , Loc, PrevSpec, + getLang())*2; + break; + case tok::kw_volatile: + isInvalid = DS.SetTypeQual(DeclSpec::TQ_volatile, Loc, PrevSpec, + getLang())*2; + break; + case tok::kw_restrict: + isInvalid = DS.SetTypeQual(DeclSpec::TQ_restrict, Loc, PrevSpec, + getLang())*2; + break; + case tok::kw___attribute: + DS.AddAttributes(ParseAttributes()); + continue; // do *not* consume the next token! + } + + // If the specifier combination wasn't legal, issue a diagnostic. + if (isInvalid) { + assert(PrevSpec && "Method did not return previous specifier!"); + if (isInvalid == 1) // Error. + Diag(Tok, diag::err_invalid_decl_spec_combination, PrevSpec); + else // extwarn. + Diag(Tok, diag::ext_duplicate_declspec, PrevSpec); + } + ConsumeToken(); + } +} + + +/// ParseDeclarator - Parse and verify a newly-initialized declarator. +/// +void Parser::ParseDeclarator(Declarator &D) { + /// This implements the 'declarator' production in the C grammar, then checks + /// for well-formedness and issues diagnostics. + ParseDeclaratorInternal(D); + + // TODO: validate D. + +} + +/// ParseDeclaratorInternal +/// declarator: [C99 6.7.5] +/// pointer[opt] direct-declarator +/// [C++] '&' declarator [C++ 8p4, dcl.decl] +/// [GNU] '&' restrict[opt] attributes[opt] declarator +/// +/// pointer: [C99 6.7.5] +/// '*' type-qualifier-list[opt] +/// '*' type-qualifier-list[opt] pointer +/// +void Parser::ParseDeclaratorInternal(Declarator &D) { + tok::TokenKind Kind = Tok.getKind(); + + // Not a pointer or C++ reference. + if (Kind != tok::star && (Kind != tok::amp || !getLang().CPlusPlus)) + return ParseDirectDeclarator(D); + + // Otherwise, '*' -> pointer or '&' -> reference. + SourceLocation Loc = ConsumeToken(); // Eat the * or &. + + if (Kind == tok::star) { + // Is a pointer. + DeclSpec DS; + + ParseTypeQualifierListOpt(DS); + + // Recursively parse the declarator. + ParseDeclaratorInternal(D); + + // Remember that we parsed a pointer type, and remember the type-quals. + D.AddTypeInfo(DeclaratorChunk::getPointer(DS.getTypeQualifiers(), Loc, + DS.TakeAttributes())); + } else { + // Is a reference + DeclSpec DS; + + // C++ 8.3.2p1: cv-qualified references are ill-formed except when the + // cv-qualifiers are introduced through the use of a typedef or of a + // template type argument, in which case the cv-qualifiers are ignored. + // + // [GNU] Retricted references are allowed. + // [GNU] Attributes on references are allowed. + ParseTypeQualifierListOpt(DS); + + if (DS.getTypeQualifiers() != DeclSpec::TQ_unspecified) { + if (DS.getTypeQualifiers() & DeclSpec::TQ_const) + Diag(DS.getConstSpecLoc(), + diag::err_invalid_reference_qualifier_application, + "const"); + if (DS.getTypeQualifiers() & DeclSpec::TQ_volatile) + Diag(DS.getVolatileSpecLoc(), + diag::err_invalid_reference_qualifier_application, + "volatile"); + } + + // Recursively parse the declarator. + ParseDeclaratorInternal(D); + + // Remember that we parsed a reference type. It doesn't have type-quals. + D.AddTypeInfo(DeclaratorChunk::getReference(DS.getTypeQualifiers(), Loc, + DS.TakeAttributes())); + } +} + +/// ParseDirectDeclarator +/// direct-declarator: [C99 6.7.5] +/// identifier +/// '(' declarator ')' +/// [GNU] '(' attributes declarator ')' +/// [C90] direct-declarator '[' constant-expression[opt] ']' +/// [C99] direct-declarator '[' type-qual-list[opt] assignment-expr[opt] ']' +/// [C99] direct-declarator '[' 'static' type-qual-list[opt] assign-expr ']' +/// [C99] direct-declarator '[' type-qual-list 'static' assignment-expr ']' +/// [C99] direct-declarator '[' type-qual-list[opt] '*' ']' +/// direct-declarator '(' parameter-type-list ')' +/// direct-declarator '(' identifier-list[opt] ')' +/// [GNU] direct-declarator '(' parameter-forward-declarations +/// parameter-type-list[opt] ')' +/// +void Parser::ParseDirectDeclarator(Declarator &D) { + // Parse the first direct-declarator seen. + if (Tok.is(tok::identifier) && D.mayHaveIdentifier()) { + assert(Tok.getIdentifierInfo() && "Not an identifier?"); + D.SetIdentifier(Tok.getIdentifierInfo(), Tok.getLocation()); + ConsumeToken(); + } else if (Tok.is(tok::l_paren)) { + // direct-declarator: '(' declarator ')' + // direct-declarator: '(' attributes declarator ')' + // Example: 'char (*X)' or 'int (*XX)(void)' + ParseParenDeclarator(D); + } else if (D.mayOmitIdentifier()) { + // This could be something simple like "int" (in which case the declarator + // portion is empty), if an abstract-declarator is allowed. + D.SetIdentifier(0, Tok.getLocation()); + } else { + // Expected identifier or '('. + Diag(Tok, diag::err_expected_ident_lparen); + D.SetIdentifier(0, Tok.getLocation()); + } + + assert(D.isPastIdentifier() && + "Haven't past the location of the identifier yet?"); + + while (1) { + if (Tok.is(tok::l_paren)) { + ParseParenDeclarator(D); + } else if (Tok.is(tok::l_square)) { + ParseBracketDeclarator(D); + } else { + break; + } + } +} + +/// ParseParenDeclarator - We parsed the declarator D up to a paren. This may +/// either be before the identifier (in which case these are just grouping +/// parens for precedence) or it may be after the identifier, in which case +/// these are function arguments. +/// +/// This method also handles this portion of the grammar: +/// parameter-type-list: [C99 6.7.5] +/// parameter-list +/// parameter-list ',' '...' +/// +/// parameter-list: [C99 6.7.5] +/// parameter-declaration +/// parameter-list ',' parameter-declaration +/// +/// parameter-declaration: [C99 6.7.5] +/// declaration-specifiers declarator +/// [GNU] declaration-specifiers declarator attributes +/// declaration-specifiers abstract-declarator[opt] +/// [GNU] declaration-specifiers abstract-declarator[opt] attributes +/// +/// identifier-list: [C99 6.7.5] +/// identifier +/// identifier-list ',' identifier +/// +void Parser::ParseParenDeclarator(Declarator &D) { + SourceLocation StartLoc = ConsumeParen(); + + // If we haven't past the identifier yet (or where the identifier would be + // stored, if this is an abstract declarator), then this is probably just + // grouping parens. + if (!D.isPastIdentifier()) { + // Okay, this is probably a grouping paren. However, if this could be an + // abstract-declarator, then this could also be the start of function + // arguments (consider 'void()'). + bool isGrouping; + + if (!D.mayOmitIdentifier()) { + // If this can't be an abstract-declarator, this *must* be a grouping + // paren, because we haven't seen the identifier yet. + isGrouping = true; + } else if (Tok.is(tok::r_paren) || // 'int()' is a function. + isDeclarationSpecifier()) { // 'int(int)' is a function. + // This handles C99 6.7.5.3p11: in "typedef int X; void foo(X)", X is + // considered to be a type, not a K&R identifier-list. + isGrouping = false; + } else { + // Otherwise, this is a grouping paren, e.g. 'int (*X)' or 'int(X)'. + isGrouping = true; + } + + // If this is a grouping paren, handle: + // direct-declarator: '(' declarator ')' + // direct-declarator: '(' attributes declarator ')' + if (isGrouping) { + if (Tok.is(tok::kw___attribute)) + D.AddAttributes(ParseAttributes()); + + ParseDeclaratorInternal(D); + // Match the ')'. + MatchRHSPunctuation(tok::r_paren, StartLoc); + return; + } + + // Okay, if this wasn't a grouping paren, it must be the start of a function + // argument list. Recognize that this declarator will never have an + // identifier (and remember where it would have been), then fall through to + // the handling of argument lists. + D.SetIdentifier(0, Tok.getLocation()); + } + + // Okay, this is the parameter list of a function definition, or it is an + // identifier list of a K&R-style function. + bool IsVariadic; + bool HasPrototype; + bool ErrorEmitted = false; + + // Build up an array of information about the parsed arguments. + llvm::SmallVector<DeclaratorChunk::ParamInfo, 16> ParamInfo; + llvm::SmallSet<const IdentifierInfo*, 16> ParamsSoFar; + + if (Tok.is(tok::r_paren)) { + // int() -> no prototype, no '...'. + IsVariadic = false; + HasPrototype = false; + } else if (Tok.is(tok::identifier) && + // K&R identifier lists can't have typedefs as identifiers, per + // C99 6.7.5.3p11. + !Actions.isTypeName(*Tok.getIdentifierInfo(), CurScope)) { + // Identifier list. Note that '(' identifier-list ')' is only allowed for + // normal declarators, not for abstract-declarators. + assert(D.isPastIdentifier() && "Identifier (if present) must be passed!"); + + // If there was no identifier specified, either we are in an + // abstract-declarator, or we are in a parameter declarator which was found + // to be abstract. In abstract-declarators, identifier lists are not valid, + // diagnose this. + if (!D.getIdentifier()) + Diag(Tok, diag::ext_ident_list_in_param); + + // Remember this identifier in ParamInfo. + ParamInfo.push_back(DeclaratorChunk::ParamInfo(Tok.getIdentifierInfo(), + Tok.getLocation(), 0)); + + ConsumeToken(); + while (Tok.is(tok::comma)) { + // Eat the comma. + ConsumeToken(); + + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + ErrorEmitted = true; + break; + } + + IdentifierInfo *ParmII = Tok.getIdentifierInfo(); + + // Verify that the argument identifier has not already been mentioned. + if (!ParamsSoFar.insert(ParmII)) { + Diag(Tok.getLocation(), diag::err_param_redefinition,ParmII->getName()); + ParmII = 0; + } + + // Remember this identifier in ParamInfo. + if (ParmII) + ParamInfo.push_back(DeclaratorChunk::ParamInfo(ParmII, + Tok.getLocation(), 0)); + + // Eat the identifier. + ConsumeToken(); + } + + // K&R 'prototype'. + IsVariadic = false; + HasPrototype = false; + } else { + // Finally, a normal, non-empty parameter type list. + + // Enter function-declaration scope, limiting any declarators for struct + // tags to the function prototype scope. + // FIXME: is this needed? + EnterScope(Scope::DeclScope); + + IsVariadic = false; + while (1) { + if (Tok.is(tok::ellipsis)) { + IsVariadic = true; + + // Check to see if this is "void(...)" which is not allowed. + if (ParamInfo.empty()) { + // Otherwise, parse parameter type list. If it starts with an + // ellipsis, diagnose the malformed function. + Diag(Tok, diag::err_ellipsis_first_arg); + IsVariadic = false; // Treat this like 'void()'. + } + + // Consume the ellipsis. + ConsumeToken(); + break; + } + + SourceLocation DSStart = Tok.getLocation(); + + // Parse the declaration-specifiers. + DeclSpec DS; + ParseDeclarationSpecifiers(DS); + + // Parse the declarator. This is "PrototypeContext", because we must + // accept either 'declarator' or 'abstract-declarator' here. + Declarator ParmDecl(DS, Declarator::PrototypeContext); + ParseDeclarator(ParmDecl); + + // Parse GNU attributes, if present. + if (Tok.is(tok::kw___attribute)) + ParmDecl.AddAttributes(ParseAttributes()); + + // Verify C99 6.7.5.3p2: The only SCS allowed is 'register'. + // NOTE: we could trivially allow 'int foo(auto int X)' if we wanted. + if (DS.getStorageClassSpec() != DeclSpec::SCS_unspecified && + DS.getStorageClassSpec() != DeclSpec::SCS_register) { + Diag(DS.getStorageClassSpecLoc(), + diag::err_invalid_storage_class_in_func_decl); + DS.ClearStorageClassSpecs(); + } + if (DS.isThreadSpecified()) { + Diag(DS.getThreadSpecLoc(), + diag::err_invalid_storage_class_in_func_decl); + DS.ClearStorageClassSpecs(); + } + + // Inform the actions module about the parameter declarator, so it gets + // added to the current scope. + Action::TypeResult ParamTy = + Actions.ActOnParamDeclaratorType(CurScope, ParmDecl); + + // Remember this parsed parameter in ParamInfo. + IdentifierInfo *ParmII = ParmDecl.getIdentifier(); + + // Verify that the argument identifier has not already been mentioned. + if (ParmII && !ParamsSoFar.insert(ParmII)) { + Diag(ParmDecl.getIdentifierLoc(), diag::err_param_redefinition, + ParmII->getName()); + ParmII = 0; + } + + // If no parameter was specified, verify that *something* was specified, + // otherwise we have a missing type and identifier. + if (DS.getParsedSpecifiers() == DeclSpec::PQ_None && + ParmDecl.getIdentifier() == 0 && ParmDecl.getNumTypeObjects() == 0) { + Diag(DSStart, diag::err_missing_param); + } else if (!DS.hasTypeSpecifier() && + (getLang().C99 || getLang().CPlusPlus)) { + // Otherwise, if something was specified but a type specifier wasn't, + // (e.g. "x" or "restrict x" or "restrict"), this is a use of implicit + // int. This is valid in C90, but not in C99 or C++. + if (ParmII) + Diag(ParmDecl.getIdentifierLoc(), + diag::ext_param_requires_type_specifier, ParmII->getName()); + else + Diag(DSStart, diag::ext_anon_param_requires_type_specifier); + } + + ParamInfo.push_back(DeclaratorChunk::ParamInfo(ParmII, + ParmDecl.getIdentifierLoc(), ParamTy.Val, ParmDecl.getInvalidType(), + ParmDecl.getDeclSpec().TakeAttributes())); + + // If the next token is a comma, consume it and keep reading arguments. + if (Tok.isNot(tok::comma)) break; + + // Consume the comma. + ConsumeToken(); + } + + HasPrototype = true; + + // Leave prototype scope. + ExitScope(); + } + + // Remember that we parsed a function type, and remember the attributes. + if (!ErrorEmitted) + D.AddTypeInfo(DeclaratorChunk::getFunction(HasPrototype, IsVariadic, + &ParamInfo[0], ParamInfo.size(), + StartLoc)); + + // If we have the closing ')', eat it and we're done. + if (Tok.is(tok::r_paren)) { + ConsumeParen(); + } else { + // If an error happened earlier parsing something else in the proto, don't + // issue another error. + if (!ErrorEmitted) + Diag(Tok, diag::err_expected_rparen); + SkipUntil(tok::r_paren); + } +} + + +/// [C90] direct-declarator '[' constant-expression[opt] ']' +/// [C99] direct-declarator '[' type-qual-list[opt] assignment-expr[opt] ']' +/// [C99] direct-declarator '[' 'static' type-qual-list[opt] assign-expr ']' +/// [C99] direct-declarator '[' type-qual-list 'static' assignment-expr ']' +/// [C99] direct-declarator '[' type-qual-list[opt] '*' ']' +void Parser::ParseBracketDeclarator(Declarator &D) { + SourceLocation StartLoc = ConsumeBracket(); + + // If valid, this location is the position where we read the 'static' keyword. + SourceLocation StaticLoc; + if (Tok.is(tok::kw_static)) + StaticLoc = ConsumeToken(); + + // If there is a type-qualifier-list, read it now. + DeclSpec DS; + ParseTypeQualifierListOpt(DS); + + // If we haven't already read 'static', check to see if there is one after the + // type-qualifier-list. + if (!StaticLoc.isValid() && Tok.is(tok::kw_static)) + StaticLoc = ConsumeToken(); + + // Handle "direct-declarator [ type-qual-list[opt] * ]". + bool isStar = false; + ExprResult NumElements(false); + if (Tok.is(tok::star)) { + // Remember the '*' token, in case we have to un-get it. + Token StarTok = Tok; + ConsumeToken(); + + // Check that the ']' token is present to avoid incorrectly parsing + // expressions starting with '*' as [*]. + if (Tok.is(tok::r_square)) { + if (StaticLoc.isValid()) + Diag(StaticLoc, diag::err_unspecified_vla_size_with_static); + StaticLoc = SourceLocation(); // Drop the static. + isStar = true; + } else { + // Otherwise, the * must have been some expression (such as '*ptr') that + // started an assignment-expr. We already consumed the token, but now we + // need to reparse it. This handles cases like 'X[*p + 4]' + NumElements = ParseAssignmentExpressionWithLeadingStar(StarTok); + } + } else if (Tok.isNot(tok::r_square)) { + // Parse the assignment-expression now. + NumElements = ParseAssignmentExpression(); + } + + // If there was an error parsing the assignment-expression, recover. + if (NumElements.isInvalid) { + // If the expression was invalid, skip it. + SkipUntil(tok::r_square); + return; + } + + MatchRHSPunctuation(tok::r_square, StartLoc); + + // If C99 isn't enabled, emit an ext-warn if the arg list wasn't empty and if + // it was not a constant expression. + if (!getLang().C99) { + // TODO: check C90 array constant exprness. + if (isStar || StaticLoc.isValid() || + 0/*TODO: NumElts is not a C90 constantexpr */) + Diag(StartLoc, diag::ext_c99_array_usage); + } + + // Remember that we parsed a pointer type, and remember the type-quals. + D.AddTypeInfo(DeclaratorChunk::getArray(DS.getTypeQualifiers(), + StaticLoc.isValid(), isStar, + NumElements.Val, StartLoc)); +} + +/// [GNU] typeof-specifier: +/// typeof ( expressions ) +/// typeof ( type-name ) +/// +void Parser::ParseTypeofSpecifier(DeclSpec &DS) { + assert(Tok.is(tok::kw_typeof) && "Not a typeof specifier"); + const IdentifierInfo *BuiltinII = Tok.getIdentifierInfo(); + SourceLocation StartLoc = ConsumeToken(); + + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_expected_lparen_after, BuiltinII->getName()); + return; + } + SourceLocation LParenLoc = ConsumeParen(), RParenLoc; + + if (isTypeSpecifierQualifier()) { + TypeTy *Ty = ParseTypeName(); + + assert(Ty && "Parser::ParseTypeofSpecifier(): missing type"); + + if (Tok.isNot(tok::r_paren)) { + MatchRHSPunctuation(tok::r_paren, LParenLoc); + return; + } + RParenLoc = ConsumeParen(); + const char *PrevSpec = 0; + // Check for duplicate type specifiers (e.g. "int typeof(int)"). + if (DS.SetTypeSpecType(DeclSpec::TST_typeofType, StartLoc, PrevSpec, Ty)) + Diag(StartLoc, diag::err_invalid_decl_spec_combination, PrevSpec); + } else { // we have an expression. + ExprResult Result = ParseExpression(); + + if (Result.isInvalid || Tok.isNot(tok::r_paren)) { + MatchRHSPunctuation(tok::r_paren, LParenLoc); + return; + } + RParenLoc = ConsumeParen(); + const char *PrevSpec = 0; + // Check for duplicate type specifiers (e.g. "int typeof(int)"). + if (DS.SetTypeSpecType(DeclSpec::TST_typeofExpr, StartLoc, PrevSpec, + Result.Val)) + Diag(StartLoc, diag::err_invalid_decl_spec_combination, PrevSpec); + } +} + diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp new file mode 100644 index 00000000000..46dcb574819 --- /dev/null +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -0,0 +1,119 @@ +//===--- ParseDeclCXX.cpp - C++ Declaration Parsing -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the C++ Declaration portions of the Parser interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/Parser.h" +#include "clang/Parse/Scope.h" +#include "clang/Basic/Diagnostic.h" +using namespace clang; + +/// ParseNamespace - We know that the current token is a namespace keyword. This +/// may either be a top level namespace or a block-level namespace alias. +/// +/// namespace-definition: [C++ 7.3: basic.namespace] +/// named-namespace-definition +/// unnamed-namespace-definition +/// +/// unnamed-namespace-definition: +/// 'namespace' attributes[opt] '{' namespace-body '}' +/// +/// named-namespace-definition: +/// original-namespace-definition +/// extension-namespace-definition +/// +/// original-namespace-definition: +/// 'namespace' identifier attributes[opt] '{' namespace-body '}' +/// +/// extension-namespace-definition: +/// 'namespace' original-namespace-name '{' namespace-body '}' +/// +/// namespace-alias-definition: [C++ 7.3.2: namespace.alias] +/// 'namespace' identifier '=' qualified-namespace-specifier ';' +/// +Parser::DeclTy *Parser::ParseNamespace(unsigned Context) { + assert(Tok.is(tok::kw_namespace) && "Not a namespace!"); + SourceLocation NamespaceLoc = ConsumeToken(); // eat the 'namespace'. + + SourceLocation IdentLoc; + IdentifierInfo *Ident = 0; + + if (Tok.is(tok::identifier)) { + Ident = Tok.getIdentifierInfo(); + IdentLoc = ConsumeToken(); // eat the identifier. + } + + // Read label attributes, if present. + DeclTy *AttrList = 0; + if (Tok.is(tok::kw___attribute)) + // FIXME: save these somewhere. + AttrList = ParseAttributes(); + + if (Tok.is(tok::equal)) { + // FIXME: Verify no attributes were present. + // FIXME: parse this. + } else if (Tok.is(tok::l_brace)) { + SourceLocation LBrace = ConsumeBrace(); + // FIXME: push a scope, push a namespace decl. + + while (Tok.isNot(tok::r_brace) && Tok.isNot(tok::eof)) { + // FIXME capture the decls. + ParseExternalDeclaration(); + } + + SourceLocation RBrace = MatchRHSPunctuation(tok::r_brace, LBrace); + + // FIXME: act on this. + } else { + unsigned D = Ident ? diag::err_expected_lbrace : + diag::err_expected_ident_lbrace; + Diag(Tok.getLocation(), D); + } + + return 0; +} + +/// ParseLinkage - We know that the current token is a string_literal +/// and just before that, that extern was seen. +/// +/// linkage-specification: [C++ 7.5p2: dcl.link] +/// 'extern' string-literal '{' declaration-seq[opt] '}' +/// 'extern' string-literal declaration +/// +Parser::DeclTy *Parser::ParseLinkage(unsigned Context) { + assert(Tok.is(tok::string_literal) && "Not a stringliteral!"); + llvm::SmallVector<char, 8> LangBuffer; + // LangBuffer is guaranteed to be big enough. + LangBuffer.resize(Tok.getLength()); + const char *LangBufPtr = &LangBuffer[0]; + unsigned StrSize = PP.getSpelling(Tok, LangBufPtr); + + SourceLocation Loc = ConsumeStringToken(); + DeclTy *D = 0; + SourceLocation LBrace, RBrace; + + if (Tok.isNot(tok::l_brace)) { + D = ParseDeclaration(Context); + } else { + LBrace = ConsumeBrace(); + while (Tok.isNot(tok::r_brace) && Tok.isNot(tok::eof)) { + // FIXME capture the decls. + D = ParseExternalDeclaration(); + } + + RBrace = MatchRHSPunctuation(tok::r_brace, LBrace); + } + + if (!D) + return 0; + + return Actions.ActOnLinkageSpec(Loc, LBrace, RBrace, LangBufPtr, StrSize, D); +} diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp new file mode 100644 index 00000000000..46714b73ea7 --- /dev/null +++ b/clang/lib/Parse/ParseExpr.cpp @@ -0,0 +1,1081 @@ +//===--- ParseExpr.cpp - Expression Parsing -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Expression parsing implementation. Expressions in +// C99 basically consist of a bunch of binary operators with unary operators and +// other random stuff at the leaves. +// +// In the C99 grammar, these unary operators bind tightest and are represented +// as the 'cast-expression' production. Everything else is either a binary +// operator (e.g. '/') or a ternary operator ("?:"). The unary leaves are +// handled by ParseCastExpression, the higher level pieces are handled by +// ParseBinaryExpression. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/Parser.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallString.h" +using namespace clang; + +/// PrecedenceLevels - These are precedences for the binary/ternary operators in +/// the C99 grammar. These have been named to relate with the C99 grammar +/// productions. Low precedences numbers bind more weakly than high numbers. +namespace prec { + enum Level { + Unknown = 0, // Not binary operator. + Comma = 1, // , + Assignment = 2, // =, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |= + Conditional = 3, // ? + LogicalOr = 4, // || + LogicalAnd = 5, // && + InclusiveOr = 6, // | + ExclusiveOr = 7, // ^ + And = 8, // & + Equality = 9, // ==, != + Relational = 10, // >=, <=, >, < + Shift = 11, // <<, >> + Additive = 12, // -, + + Multiplicative = 13 // *, /, % + }; +} + + +/// getBinOpPrecedence - Return the precedence of the specified binary operator +/// token. This returns: +/// +static prec::Level getBinOpPrecedence(tok::TokenKind Kind) { + switch (Kind) { + default: return prec::Unknown; + case tok::comma: return prec::Comma; + case tok::equal: + case tok::starequal: + case tok::slashequal: + case tok::percentequal: + case tok::plusequal: + case tok::minusequal: + case tok::lesslessequal: + case tok::greatergreaterequal: + case tok::ampequal: + case tok::caretequal: + case tok::pipeequal: return prec::Assignment; + case tok::question: return prec::Conditional; + case tok::pipepipe: return prec::LogicalOr; + case tok::ampamp: return prec::LogicalAnd; + case tok::pipe: return prec::InclusiveOr; + case tok::caret: return prec::ExclusiveOr; + case tok::amp: return prec::And; + case tok::exclaimequal: + case tok::equalequal: return prec::Equality; + case tok::lessequal: + case tok::less: + case tok::greaterequal: + case tok::greater: return prec::Relational; + case tok::lessless: + case tok::greatergreater: return prec::Shift; + case tok::plus: + case tok::minus: return prec::Additive; + case tok::percent: + case tok::slash: + case tok::star: return prec::Multiplicative; + } +} + + +/// ParseExpression - Simple precedence-based parser for binary/ternary +/// operators. +/// +/// Note: we diverge from the C99 grammar when parsing the assignment-expression +/// production. C99 specifies that the LHS of an assignment operator should be +/// parsed as a unary-expression, but consistency dictates that it be a +/// conditional-expession. In practice, the important thing here is that the +/// LHS of an assignment has to be an l-value, which productions between +/// unary-expression and conditional-expression don't produce. Because we want +/// consistency, we parse the LHS as a conditional-expression, then check for +/// l-value-ness in semantic analysis stages. +/// +/// multiplicative-expression: [C99 6.5.5] +/// cast-expression +/// multiplicative-expression '*' cast-expression +/// multiplicative-expression '/' cast-expression +/// multiplicative-expression '%' cast-expression +/// +/// additive-expression: [C99 6.5.6] +/// multiplicative-expression +/// additive-expression '+' multiplicative-expression +/// additive-expression '-' multiplicative-expression +/// +/// shift-expression: [C99 6.5.7] +/// additive-expression +/// shift-expression '<<' additive-expression +/// shift-expression '>>' additive-expression +/// +/// relational-expression: [C99 6.5.8] +/// shift-expression +/// relational-expression '<' shift-expression +/// relational-expression '>' shift-expression +/// relational-expression '<=' shift-expression +/// relational-expression '>=' shift-expression +/// +/// equality-expression: [C99 6.5.9] +/// relational-expression +/// equality-expression '==' relational-expression +/// equality-expression '!=' relational-expression +/// +/// AND-expression: [C99 6.5.10] +/// equality-expression +/// AND-expression '&' equality-expression +/// +/// exclusive-OR-expression: [C99 6.5.11] +/// AND-expression +/// exclusive-OR-expression '^' AND-expression +/// +/// inclusive-OR-expression: [C99 6.5.12] +/// exclusive-OR-expression +/// inclusive-OR-expression '|' exclusive-OR-expression +/// +/// logical-AND-expression: [C99 6.5.13] +/// inclusive-OR-expression +/// logical-AND-expression '&&' inclusive-OR-expression +/// +/// logical-OR-expression: [C99 6.5.14] +/// logical-AND-expression +/// logical-OR-expression '||' logical-AND-expression +/// +/// conditional-expression: [C99 6.5.15] +/// logical-OR-expression +/// logical-OR-expression '?' expression ':' conditional-expression +/// [GNU] logical-OR-expression '?' ':' conditional-expression +/// +/// assignment-expression: [C99 6.5.16] +/// conditional-expression +/// unary-expression assignment-operator assignment-expression +/// [C++] throw-expression [C++ 15] +/// +/// assignment-operator: one of +/// = *= /= %= += -= <<= >>= &= ^= |= +/// +/// expression: [C99 6.5.17] +/// assignment-expression +/// expression ',' assignment-expression +/// +Parser::ExprResult Parser::ParseExpression() { + if (Tok.is(tok::kw_throw)) + return ParseThrowExpression(); + + ExprResult LHS = ParseCastExpression(false); + if (LHS.isInvalid) return LHS; + + return ParseRHSOfBinaryExpression(LHS, prec::Comma); +} + +/// This routine is called when the '@' is seen and consumed. +/// Current token is an Identifier and is not a 'try'. This +/// routine is necessary to disambiguate @try-statement from, +/// for example, @encode-expression. +/// +Parser::ExprResult Parser::ParseExpressionWithLeadingAt(SourceLocation AtLoc) { + ExprResult LHS = ParseObjCAtExpression(AtLoc); + if (LHS.isInvalid) return LHS; + + return ParseRHSOfBinaryExpression(LHS, prec::Comma); +} + +/// ParseAssignmentExpression - Parse an expr that doesn't include commas. +/// +Parser::ExprResult Parser::ParseAssignmentExpression() { + if (Tok.is(tok::kw_throw)) + return ParseThrowExpression(); + + ExprResult LHS = ParseCastExpression(false); + if (LHS.isInvalid) return LHS; + + return ParseRHSOfBinaryExpression(LHS, prec::Assignment); +} + +Parser::ExprResult Parser::ParseConstantExpression() { + ExprResult LHS = ParseCastExpression(false); + if (LHS.isInvalid) return LHS; + + return ParseRHSOfBinaryExpression(LHS, prec::Conditional); +} + +/// ParseExpressionWithLeadingIdentifier - This special purpose method is used +/// in contexts where we have already consumed an identifier (which we saved in +/// 'IdTok'), then discovered that the identifier was really the leading token +/// of part of an expression. For example, in "A[1]+B", we consumed "A" (which +/// is now in 'IdTok') and the current token is "[". +Parser::ExprResult Parser:: +ParseExpressionWithLeadingIdentifier(const Token &IdTok) { + // We know that 'IdTok' must correspond to this production: + // primary-expression: identifier + + // Let the actions module handle the identifier. + ExprResult Res = Actions.ActOnIdentifierExpr(CurScope, IdTok.getLocation(), + *IdTok.getIdentifierInfo(), + Tok.is(tok::l_paren)); + + // Because we have to parse an entire cast-expression before starting the + // ParseRHSOfBinaryExpression method (which parses any trailing binops), we + // need to handle the 'postfix-expression' rules. We do this by invoking + // ParsePostfixExpressionSuffix to consume any postfix-expression suffixes: + Res = ParsePostfixExpressionSuffix(Res); + if (Res.isInvalid) return Res; + + // At this point, the "A[1]" part of "A[1]+B" has been consumed. Once this is + // done, we know we don't have to do anything for cast-expression, because the + // only non-postfix-expression production starts with a '(' token, and we know + // we have an identifier. As such, we can invoke ParseRHSOfBinaryExpression + // to consume any trailing operators (e.g. "+" in this example) and connected + // chunks of the expression. + return ParseRHSOfBinaryExpression(Res, prec::Comma); +} + +/// ParseExpressionWithLeadingIdentifier - This special purpose method is used +/// in contexts where we have already consumed an identifier (which we saved in +/// 'IdTok'), then discovered that the identifier was really the leading token +/// of part of an assignment-expression. For example, in "A[1]+B", we consumed +/// "A" (which is now in 'IdTok') and the current token is "[". +Parser::ExprResult Parser:: +ParseAssignmentExprWithLeadingIdentifier(const Token &IdTok) { + // We know that 'IdTok' must correspond to this production: + // primary-expression: identifier + + // Let the actions module handle the identifier. + ExprResult Res = Actions.ActOnIdentifierExpr(CurScope, IdTok.getLocation(), + *IdTok.getIdentifierInfo(), + Tok.is(tok::l_paren)); + + // Because we have to parse an entire cast-expression before starting the + // ParseRHSOfBinaryExpression method (which parses any trailing binops), we + // need to handle the 'postfix-expression' rules. We do this by invoking + // ParsePostfixExpressionSuffix to consume any postfix-expression suffixes: + Res = ParsePostfixExpressionSuffix(Res); + if (Res.isInvalid) return Res; + + // At this point, the "A[1]" part of "A[1]+B" has been consumed. Once this is + // done, we know we don't have to do anything for cast-expression, because the + // only non-postfix-expression production starts with a '(' token, and we know + // we have an identifier. As such, we can invoke ParseRHSOfBinaryExpression + // to consume any trailing operators (e.g. "+" in this example) and connected + // chunks of the expression. + return ParseRHSOfBinaryExpression(Res, prec::Assignment); +} + + +/// ParseAssignmentExpressionWithLeadingStar - This special purpose method is +/// used in contexts where we have already consumed a '*' (which we saved in +/// 'StarTok'), then discovered that the '*' was really the leading token of an +/// expression. For example, in "*(int*)P+B", we consumed "*" (which is +/// now in 'StarTok') and the current token is "(". +Parser::ExprResult Parser:: +ParseAssignmentExpressionWithLeadingStar(const Token &StarTok) { + // We know that 'StarTok' must correspond to this production: + // unary-expression: unary-operator cast-expression + // where 'unary-operator' is '*'. + + // Parse the cast-expression that follows the '*'. This will parse the + // "*(int*)P" part of "*(int*)P+B". + ExprResult Res = ParseCastExpression(false); + if (Res.isInvalid) return Res; + + // Combine StarTok + Res to get the new AST for the combined expression.. + Res = Actions.ActOnUnaryOp(StarTok.getLocation(), tok::star, Res.Val); + if (Res.isInvalid) return Res; + + + // We have to parse an entire cast-expression before starting the + // ParseRHSOfBinaryExpression method (which parses any trailing binops). Since + // we know that the only production above us is the cast-expression + // production, and because the only alternative productions start with a '(' + // token (we know we had a '*'), there is no work to do to get a whole + // cast-expression. + + // At this point, the "*(int*)P" part of "*(int*)P+B" has been consumed. Once + // this is done, we can invoke ParseRHSOfBinaryExpression to consume any + // trailing operators (e.g. "+" in this example) and connected chunks of the + // assignment-expression. + return ParseRHSOfBinaryExpression(Res, prec::Assignment); +} + + +/// ParseRHSOfBinaryExpression - Parse a binary expression that starts with +/// LHS and has a precedence of at least MinPrec. +Parser::ExprResult +Parser::ParseRHSOfBinaryExpression(ExprResult LHS, unsigned MinPrec) { + unsigned NextTokPrec = getBinOpPrecedence(Tok.getKind()); + SourceLocation ColonLoc; + + while (1) { + // If this token has a lower precedence than we are allowed to parse (e.g. + // because we are called recursively, or because the token is not a binop), + // then we are done! + if (NextTokPrec < MinPrec) + return LHS; + + // Consume the operator, saving the operator token for error reporting. + Token OpToken = Tok; + ConsumeToken(); + + // Special case handling for the ternary operator. + ExprResult TernaryMiddle(true); + if (NextTokPrec == prec::Conditional) { + if (Tok.isNot(tok::colon)) { + // Handle this production specially: + // logical-OR-expression '?' expression ':' conditional-expression + // In particular, the RHS of the '?' is 'expression', not + // 'logical-OR-expression' as we might expect. + TernaryMiddle = ParseExpression(); + if (TernaryMiddle.isInvalid) { + Actions.DeleteExpr(LHS.Val); + return TernaryMiddle; + } + } else { + // Special case handling of "X ? Y : Z" where Y is empty: + // logical-OR-expression '?' ':' conditional-expression [GNU] + TernaryMiddle = ExprResult(false); + Diag(Tok, diag::ext_gnu_conditional_expr); + } + + if (Tok.isNot(tok::colon)) { + Diag(Tok, diag::err_expected_colon); + Diag(OpToken, diag::err_matching, "?"); + Actions.DeleteExpr(LHS.Val); + Actions.DeleteExpr(TernaryMiddle.Val); + return ExprResult(true); + } + + // Eat the colon. + ColonLoc = ConsumeToken(); + } + + // Parse another leaf here for the RHS of the operator. + ExprResult RHS = ParseCastExpression(false); + if (RHS.isInvalid) { + Actions.DeleteExpr(LHS.Val); + Actions.DeleteExpr(TernaryMiddle.Val); + return RHS; + } + + // Remember the precedence of this operator and get the precedence of the + // operator immediately to the right of the RHS. + unsigned ThisPrec = NextTokPrec; + NextTokPrec = getBinOpPrecedence(Tok.getKind()); + + // Assignment and conditional expressions are right-associative. + bool isRightAssoc = ThisPrec == prec::Conditional || + ThisPrec == prec::Assignment; + + // Get the precedence of the operator to the right of the RHS. If it binds + // more tightly with RHS than we do, evaluate it completely first. + if (ThisPrec < NextTokPrec || + (ThisPrec == NextTokPrec && isRightAssoc)) { + // If this is left-associative, only parse things on the RHS that bind + // more tightly than the current operator. If it is left-associative, it + // is okay, to bind exactly as tightly. For example, compile A=B=C=D as + // A=(B=(C=D)), where each paren is a level of recursion here. + RHS = ParseRHSOfBinaryExpression(RHS, ThisPrec + !isRightAssoc); + if (RHS.isInvalid) { + Actions.DeleteExpr(LHS.Val); + Actions.DeleteExpr(TernaryMiddle.Val); + return RHS; + } + + NextTokPrec = getBinOpPrecedence(Tok.getKind()); + } + assert(NextTokPrec <= ThisPrec && "Recursion didn't work!"); + + if (!LHS.isInvalid) { + // Combine the LHS and RHS into the LHS (e.g. build AST). + if (TernaryMiddle.isInvalid) + LHS = Actions.ActOnBinOp(OpToken.getLocation(), OpToken.getKind(), + LHS.Val, RHS.Val); + else + LHS = Actions.ActOnConditionalOp(OpToken.getLocation(), ColonLoc, + LHS.Val, TernaryMiddle.Val, RHS.Val); + } else { + // We had a semantic error on the LHS. Just free the RHS and continue. + Actions.DeleteExpr(TernaryMiddle.Val); + Actions.DeleteExpr(RHS.Val); + } + } +} + +/// ParseCastExpression - Parse a cast-expression, or, if isUnaryExpression is +/// true, parse a unary-expression. +/// +/// cast-expression: [C99 6.5.4] +/// unary-expression +/// '(' type-name ')' cast-expression +/// +/// unary-expression: [C99 6.5.3] +/// postfix-expression +/// '++' unary-expression +/// '--' unary-expression +/// unary-operator cast-expression +/// 'sizeof' unary-expression +/// 'sizeof' '(' type-name ')' +/// [GNU] '__alignof' unary-expression +/// [GNU] '__alignof' '(' type-name ')' +/// [GNU] '&&' identifier +/// +/// unary-operator: one of +/// '&' '*' '+' '-' '~' '!' +/// [GNU] '__extension__' '__real' '__imag' +/// +/// primary-expression: [C99 6.5.1] +/// identifier +/// constant +/// string-literal +/// [C++] boolean-literal [C++ 2.13.5] +/// '(' expression ')' +/// '__func__' [C99 6.4.2.2] +/// [GNU] '__FUNCTION__' +/// [GNU] '__PRETTY_FUNCTION__' +/// [GNU] '(' compound-statement ')' +/// [GNU] '__builtin_va_arg' '(' assignment-expression ',' type-name ')' +/// [GNU] '__builtin_offsetof' '(' type-name ',' offsetof-member-designator')' +/// [GNU] '__builtin_choose_expr' '(' assign-expr ',' assign-expr ',' +/// assign-expr ')' +/// [GNU] '__builtin_types_compatible_p' '(' type-name ',' type-name ')' +/// [OBJC] '[' objc-message-expr ']' +/// [OBJC] '@selector' '(' objc-selector-arg ')' +/// [OBJC] '@protocol' '(' identifier ')' +/// [OBJC] '@encode' '(' type-name ')' +/// [OBJC] objc-string-literal +/// [C++] 'const_cast' '<' type-name '>' '(' expression ')' [C++ 5.2p1] +/// [C++] 'dynamic_cast' '<' type-name '>' '(' expression ')' [C++ 5.2p1] +/// [C++] 'reinterpret_cast' '<' type-name '>' '(' expression ')' [C++ 5.2p1] +/// [C++] 'static_cast' '<' type-name '>' '(' expression ')' [C++ 5.2p1] +/// +/// constant: [C99 6.4.4] +/// integer-constant +/// floating-constant +/// enumeration-constant -> identifier +/// character-constant +/// +Parser::ExprResult Parser::ParseCastExpression(bool isUnaryExpression) { + ExprResult Res; + tok::TokenKind SavedKind = Tok.getKind(); + + // This handles all of cast-expression, unary-expression, postfix-expression, + // and primary-expression. We handle them together like this for efficiency + // and to simplify handling of an expression starting with a '(' token: which + // may be one of a parenthesized expression, cast-expression, compound literal + // expression, or statement expression. + // + // If the parsed tokens consist of a primary-expression, the cases below + // call ParsePostfixExpressionSuffix to handle the postfix expression + // suffixes. Cases that cannot be followed by postfix exprs should + // return without invoking ParsePostfixExpressionSuffix. + switch (SavedKind) { + case tok::l_paren: { + // If this expression is limited to being a unary-expression, the parent can + // not start a cast expression. + ParenParseOption ParenExprType = + isUnaryExpression ? CompoundLiteral : CastExpr; + TypeTy *CastTy; + SourceLocation LParenLoc = Tok.getLocation(); + SourceLocation RParenLoc; + Res = ParseParenExpression(ParenExprType, CastTy, RParenLoc); + if (Res.isInvalid) return Res; + + switch (ParenExprType) { + case SimpleExpr: break; // Nothing else to do. + case CompoundStmt: break; // Nothing else to do. + case CompoundLiteral: + // We parsed '(' type-name ')' '{' ... '}'. If any suffixes of + // postfix-expression exist, parse them now. + break; + case CastExpr: + // We parsed '(' type-name ')' and the thing after it wasn't a '{'. Parse + // the cast-expression that follows it next. + // TODO: For cast expression with CastTy. + Res = ParseCastExpression(false); + if (!Res.isInvalid) + Res = Actions.ActOnCastExpr(LParenLoc, CastTy, RParenLoc, Res.Val); + return Res; + } + + // These can be followed by postfix-expr pieces. + return ParsePostfixExpressionSuffix(Res); + } + + // primary-expression + case tok::numeric_constant: + // constant: integer-constant + // constant: floating-constant + + Res = Actions.ActOnNumericConstant(Tok); + ConsumeToken(); + + // These can be followed by postfix-expr pieces. + return ParsePostfixExpressionSuffix(Res); + + case tok::kw_true: + case tok::kw_false: + return ParseCXXBoolLiteral(); + + case tok::identifier: { // primary-expression: identifier + // constant: enumeration-constant + // Consume the identifier so that we can see if it is followed by a '('. + // Function designators are allowed to be undeclared (C99 6.5.1p2), so we + // need to know whether or not this identifier is a function designator or + // not. + IdentifierInfo &II = *Tok.getIdentifierInfo(); + SourceLocation L = ConsumeToken(); + Res = Actions.ActOnIdentifierExpr(CurScope, L, II, Tok.is(tok::l_paren)); + // These can be followed by postfix-expr pieces. + return ParsePostfixExpressionSuffix(Res); + } + case tok::char_constant: // constant: character-constant + Res = Actions.ActOnCharacterConstant(Tok); + ConsumeToken(); + // These can be followed by postfix-expr pieces. + return ParsePostfixExpressionSuffix(Res); + case tok::kw___func__: // primary-expression: __func__ [C99 6.4.2.2] + case tok::kw___FUNCTION__: // primary-expression: __FUNCTION__ [GNU] + case tok::kw___PRETTY_FUNCTION__: // primary-expression: __P..Y_F..N__ [GNU] + Res = Actions.ActOnPreDefinedExpr(Tok.getLocation(), SavedKind); + ConsumeToken(); + // These can be followed by postfix-expr pieces. + return ParsePostfixExpressionSuffix(Res); + case tok::string_literal: // primary-expression: string-literal + case tok::wide_string_literal: + Res = ParseStringLiteralExpression(); + if (Res.isInvalid) return Res; + // This can be followed by postfix-expr pieces (e.g. "foo"[1]). + return ParsePostfixExpressionSuffix(Res); + case tok::kw___builtin_va_arg: + case tok::kw___builtin_offsetof: + case tok::kw___builtin_choose_expr: + case tok::kw___builtin_overload: + case tok::kw___builtin_types_compatible_p: + return ParseBuiltinPrimaryExpression(); + case tok::plusplus: // unary-expression: '++' unary-expression + case tok::minusminus: { // unary-expression: '--' unary-expression + SourceLocation SavedLoc = ConsumeToken(); + Res = ParseCastExpression(true); + if (!Res.isInvalid) + Res = Actions.ActOnUnaryOp(SavedLoc, SavedKind, Res.Val); + return Res; + } + case tok::amp: // unary-expression: '&' cast-expression + case tok::star: // unary-expression: '*' cast-expression + case tok::plus: // unary-expression: '+' cast-expression + case tok::minus: // unary-expression: '-' cast-expression + case tok::tilde: // unary-expression: '~' cast-expression + case tok::exclaim: // unary-expression: '!' cast-expression + case tok::kw___real: // unary-expression: '__real' cast-expression [GNU] + case tok::kw___imag: { // unary-expression: '__imag' cast-expression [GNU] + SourceLocation SavedLoc = ConsumeToken(); + Res = ParseCastExpression(false); + if (!Res.isInvalid) + Res = Actions.ActOnUnaryOp(SavedLoc, SavedKind, Res.Val); + return Res; + } + + case tok::kw___extension__:{//unary-expression:'__extension__' cast-expr [GNU] + // __extension__ silences extension warnings in the subexpression. + bool SavedExtWarn = Diags.getWarnOnExtensions(); + Diags.setWarnOnExtensions(false); + SourceLocation SavedLoc = ConsumeToken(); + Res = ParseCastExpression(false); + if (!Res.isInvalid) + Res = Actions.ActOnUnaryOp(SavedLoc, SavedKind, Res.Val); + Diags.setWarnOnExtensions(SavedExtWarn); + return Res; + } + case tok::kw_sizeof: // unary-expression: 'sizeof' unary-expression + // unary-expression: 'sizeof' '(' type-name ')' + case tok::kw___alignof: // unary-expression: '__alignof' unary-expression + // unary-expression: '__alignof' '(' type-name ')' + return ParseSizeofAlignofExpression(); + case tok::ampamp: { // unary-expression: '&&' identifier + SourceLocation AmpAmpLoc = ConsumeToken(); + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + return ExprResult(true); + } + + Diag(AmpAmpLoc, diag::ext_gnu_address_of_label); + Res = Actions.ActOnAddrLabel(AmpAmpLoc, Tok.getLocation(), + Tok.getIdentifierInfo()); + ConsumeToken(); + return Res; + } + case tok::kw_const_cast: + case tok::kw_dynamic_cast: + case tok::kw_reinterpret_cast: + case tok::kw_static_cast: + return ParseCXXCasts(); + case tok::at: { + SourceLocation AtLoc = ConsumeToken(); + return ParseObjCAtExpression(AtLoc); + } + case tok::l_square: + // These can be followed by postfix-expr pieces. + return ParsePostfixExpressionSuffix(ParseObjCMessageExpression()); + default: + Diag(Tok, diag::err_expected_expression); + return ExprResult(true); + } + + // unreachable. + abort(); +} + +/// ParsePostfixExpressionSuffix - Once the leading part of a postfix-expression +/// is parsed, this method parses any suffixes that apply. +/// +/// postfix-expression: [C99 6.5.2] +/// primary-expression +/// postfix-expression '[' expression ']' +/// postfix-expression '(' argument-expression-list[opt] ')' +/// postfix-expression '.' identifier +/// postfix-expression '->' identifier +/// postfix-expression '++' +/// postfix-expression '--' +/// '(' type-name ')' '{' initializer-list '}' +/// '(' type-name ')' '{' initializer-list ',' '}' +/// +/// argument-expression-list: [C99 6.5.2] +/// argument-expression +/// argument-expression-list ',' assignment-expression +/// +Parser::ExprResult Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { + + // Now that the primary-expression piece of the postfix-expression has been + // parsed, see if there are any postfix-expression pieces here. + SourceLocation Loc; + while (1) { + switch (Tok.getKind()) { + default: // Not a postfix-expression suffix. + return LHS; + case tok::l_square: { // postfix-expression: p-e '[' expression ']' + Loc = ConsumeBracket(); + ExprResult Idx = ParseExpression(); + + SourceLocation RLoc = Tok.getLocation(); + + if (!LHS.isInvalid && !Idx.isInvalid && Tok.is(tok::r_square)) + LHS = Actions.ActOnArraySubscriptExpr(LHS.Val, Loc, Idx.Val, RLoc); + else + LHS = ExprResult(true); + + // Match the ']'. + MatchRHSPunctuation(tok::r_square, Loc); + break; + } + + case tok::l_paren: { // p-e: p-e '(' argument-expression-list[opt] ')' + llvm::SmallVector<ExprTy*, 8> ArgExprs; + llvm::SmallVector<SourceLocation, 8> CommaLocs; + + Loc = ConsumeParen(); + + if (Tok.isNot(tok::r_paren)) { + while (1) { + ExprResult ArgExpr = ParseAssignmentExpression(); + if (ArgExpr.isInvalid) { + SkipUntil(tok::r_paren); + return ExprResult(true); + } else + ArgExprs.push_back(ArgExpr.Val); + + if (Tok.isNot(tok::comma)) + break; + // Move to the next argument, remember where the comma was. + CommaLocs.push_back(ConsumeToken()); + } + } + + // Match the ')'. + if (!LHS.isInvalid && Tok.is(tok::r_paren)) { + assert((ArgExprs.size() == 0 || ArgExprs.size()-1 == CommaLocs.size())&& + "Unexpected number of commas!"); + LHS = Actions.ActOnCallExpr(LHS.Val, Loc, &ArgExprs[0], ArgExprs.size(), + &CommaLocs[0], Tok.getLocation()); + } + + MatchRHSPunctuation(tok::r_paren, Loc); + break; + } + case tok::arrow: // postfix-expression: p-e '->' identifier + case tok::period: { // postfix-expression: p-e '.' identifier + tok::TokenKind OpKind = Tok.getKind(); + SourceLocation OpLoc = ConsumeToken(); // Eat the "." or "->" token. + + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + return ExprResult(true); + } + + if (!LHS.isInvalid) + LHS = Actions.ActOnMemberReferenceExpr(LHS.Val, OpLoc, OpKind, + Tok.getLocation(), + *Tok.getIdentifierInfo()); + ConsumeToken(); + break; + } + case tok::plusplus: // postfix-expression: postfix-expression '++' + case tok::minusminus: // postfix-expression: postfix-expression '--' + if (!LHS.isInvalid) + LHS = Actions.ActOnPostfixUnaryOp(Tok.getLocation(), Tok.getKind(), + LHS.Val); + ConsumeToken(); + break; + } + } +} + + +/// ParseSizeofAlignofExpression - Parse a sizeof or alignof expression. +/// unary-expression: [C99 6.5.3] +/// 'sizeof' unary-expression +/// 'sizeof' '(' type-name ')' +/// [GNU] '__alignof' unary-expression +/// [GNU] '__alignof' '(' type-name ')' +Parser::ExprResult Parser::ParseSizeofAlignofExpression() { + assert((Tok.is(tok::kw_sizeof) || Tok.is(tok::kw___alignof)) && + "Not a sizeof/alignof expression!"); + Token OpTok = Tok; + ConsumeToken(); + + // If the operand doesn't start with an '(', it must be an expression. + ExprResult Operand; + if (Tok.isNot(tok::l_paren)) { + Operand = ParseCastExpression(true); + } else { + // If it starts with a '(', we know that it is either a parenthesized + // type-name, or it is a unary-expression that starts with a compound + // literal, or starts with a primary-expression that is a parenthesized + // expression. + ParenParseOption ExprType = CastExpr; + TypeTy *CastTy; + SourceLocation LParenLoc = Tok.getLocation(), RParenLoc; + Operand = ParseParenExpression(ExprType, CastTy, RParenLoc); + + // If ParseParenExpression parsed a '(typename)' sequence only, the this is + // sizeof/alignof a type. Otherwise, it is sizeof/alignof an expression. + if (ExprType == CastExpr) + return Actions.ActOnSizeOfAlignOfTypeExpr(OpTok.getLocation(), + OpTok.is(tok::kw_sizeof), + LParenLoc, CastTy, RParenLoc); + + // If this is a parenthesized expression, it is the start of a + // unary-expression, but doesn't include any postfix pieces. Parse these + // now if present. + Operand = ParsePostfixExpressionSuffix(Operand); + } + + // If we get here, the operand to the sizeof/alignof was an expresion. + if (!Operand.isInvalid) + Operand = Actions.ActOnUnaryOp(OpTok.getLocation(), OpTok.getKind(), + Operand.Val); + return Operand; +} + +/// ParseBuiltinPrimaryExpression +/// +/// primary-expression: [C99 6.5.1] +/// [GNU] '__builtin_va_arg' '(' assignment-expression ',' type-name ')' +/// [GNU] '__builtin_offsetof' '(' type-name ',' offsetof-member-designator')' +/// [GNU] '__builtin_choose_expr' '(' assign-expr ',' assign-expr ',' +/// assign-expr ')' +/// [GNU] '__builtin_types_compatible_p' '(' type-name ',' type-name ')' +/// [CLANG] '__builtin_overload' '(' expr (',' expr)* ')' +/// +/// [GNU] offsetof-member-designator: +/// [GNU] identifier +/// [GNU] offsetof-member-designator '.' identifier +/// [GNU] offsetof-member-designator '[' expression ']' +/// +Parser::ExprResult Parser::ParseBuiltinPrimaryExpression() { + ExprResult Res(false); + const IdentifierInfo *BuiltinII = Tok.getIdentifierInfo(); + + tok::TokenKind T = Tok.getKind(); + SourceLocation StartLoc = ConsumeToken(); // Eat the builtin identifier. + + // All of these start with an open paren. + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_expected_lparen_after, BuiltinII->getName()); + return ExprResult(true); + } + + SourceLocation LParenLoc = ConsumeParen(); + // TODO: Build AST. + + switch (T) { + default: assert(0 && "Not a builtin primary expression!"); + case tok::kw___builtin_va_arg: { + ExprResult Expr = ParseAssignmentExpression(); + if (Expr.isInvalid) { + SkipUntil(tok::r_paren); + return Res; + } + + if (ExpectAndConsume(tok::comma, diag::err_expected_comma, "",tok::r_paren)) + return ExprResult(true); + + TypeTy *Ty = ParseTypeName(); + + if (Tok.isNot(tok::r_paren)) { + Diag(Tok, diag::err_expected_rparen); + return ExprResult(true); + } + Res = Actions.ActOnVAArg(StartLoc, Expr.Val, Ty, ConsumeParen()); + break; + } + case tok::kw___builtin_offsetof: { + SourceLocation TypeLoc = Tok.getLocation(); + TypeTy *Ty = ParseTypeName(); + + if (ExpectAndConsume(tok::comma, diag::err_expected_comma, "",tok::r_paren)) + return ExprResult(true); + + // We must have at least one identifier here. + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + SkipUntil(tok::r_paren); + return true; + } + + // Keep track of the various subcomponents we see. + llvm::SmallVector<Action::OffsetOfComponent, 4> Comps; + + Comps.push_back(Action::OffsetOfComponent()); + Comps.back().isBrackets = false; + Comps.back().U.IdentInfo = Tok.getIdentifierInfo(); + Comps.back().LocStart = Comps.back().LocEnd = ConsumeToken(); + + while (1) { + if (Tok.is(tok::period)) { + // offsetof-member-designator: offsetof-member-designator '.' identifier + Comps.push_back(Action::OffsetOfComponent()); + Comps.back().isBrackets = false; + Comps.back().LocStart = ConsumeToken(); + + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + SkipUntil(tok::r_paren); + return true; + } + Comps.back().U.IdentInfo = Tok.getIdentifierInfo(); + Comps.back().LocEnd = ConsumeToken(); + + } else if (Tok.is(tok::l_square)) { + // offsetof-member-designator: offsetof-member-design '[' expression ']' + Comps.push_back(Action::OffsetOfComponent()); + Comps.back().isBrackets = true; + Comps.back().LocStart = ConsumeBracket(); + Res = ParseExpression(); + if (Res.isInvalid) { + SkipUntil(tok::r_paren); + return Res; + } + Comps.back().U.E = Res.Val; + + Comps.back().LocEnd = + MatchRHSPunctuation(tok::r_square, Comps.back().LocStart); + } else if (Tok.is(tok::r_paren)) { + Res = Actions.ActOnBuiltinOffsetOf(StartLoc, TypeLoc, Ty, &Comps[0], + Comps.size(), ConsumeParen()); + break; + } else { + // Error occurred. + return ExprResult(true); + } + } + break; + } + case tok::kw___builtin_choose_expr: { + ExprResult Cond = ParseAssignmentExpression(); + if (Cond.isInvalid) { + SkipUntil(tok::r_paren); + return Cond; + } + if (ExpectAndConsume(tok::comma, diag::err_expected_comma, "",tok::r_paren)) + return ExprResult(true); + + ExprResult Expr1 = ParseAssignmentExpression(); + if (Expr1.isInvalid) { + SkipUntil(tok::r_paren); + return Expr1; + } + if (ExpectAndConsume(tok::comma, diag::err_expected_comma, "",tok::r_paren)) + return ExprResult(true); + + ExprResult Expr2 = ParseAssignmentExpression(); + if (Expr2.isInvalid) { + SkipUntil(tok::r_paren); + return Expr2; + } + if (Tok.isNot(tok::r_paren)) { + Diag(Tok, diag::err_expected_rparen); + return ExprResult(true); + } + Res = Actions.ActOnChooseExpr(StartLoc, Cond.Val, Expr1.Val, Expr2.Val, + ConsumeParen()); + break; + } + case tok::kw___builtin_overload: { + llvm::SmallVector<ExprTy*, 8> ArgExprs; + llvm::SmallVector<SourceLocation, 8> CommaLocs; + + // For each iteration through the loop look for assign-expr followed by a + // comma. If there is no comma, break and attempt to match r-paren. + if (Tok.isNot(tok::r_paren)) { + while (1) { + ExprResult ArgExpr = ParseAssignmentExpression(); + if (ArgExpr.isInvalid) { + SkipUntil(tok::r_paren); + return ExprResult(true); + } else + ArgExprs.push_back(ArgExpr.Val); + + if (Tok.isNot(tok::comma)) + break; + // Move to the next argument, remember where the comma was. + CommaLocs.push_back(ConsumeToken()); + } + } + + // Attempt to consume the r-paren + if (Tok.isNot(tok::r_paren)) { + Diag(Tok, diag::err_expected_rparen); + SkipUntil(tok::r_paren); + return ExprResult(true); + } + Res = Actions.ActOnOverloadExpr(&ArgExprs[0], ArgExprs.size(), + &CommaLocs[0], StartLoc, ConsumeParen()); + break; + } + case tok::kw___builtin_types_compatible_p: + TypeTy *Ty1 = ParseTypeName(); + + if (ExpectAndConsume(tok::comma, diag::err_expected_comma, "",tok::r_paren)) + return ExprResult(true); + + TypeTy *Ty2 = ParseTypeName(); + + if (Tok.isNot(tok::r_paren)) { + Diag(Tok, diag::err_expected_rparen); + return ExprResult(true); + } + Res = Actions.ActOnTypesCompatibleExpr(StartLoc, Ty1, Ty2, ConsumeParen()); + break; + } + + // These can be followed by postfix-expr pieces because they are + // primary-expressions. + return ParsePostfixExpressionSuffix(Res); +} + +/// ParseParenExpression - This parses the unit that starts with a '(' token, +/// based on what is allowed by ExprType. The actual thing parsed is returned +/// in ExprType. +/// +/// primary-expression: [C99 6.5.1] +/// '(' expression ')' +/// [GNU] '(' compound-statement ')' (if !ParenExprOnly) +/// postfix-expression: [C99 6.5.2] +/// '(' type-name ')' '{' initializer-list '}' +/// '(' type-name ')' '{' initializer-list ',' '}' +/// cast-expression: [C99 6.5.4] +/// '(' type-name ')' cast-expression +/// +Parser::ExprResult Parser::ParseParenExpression(ParenParseOption &ExprType, + TypeTy *&CastTy, + SourceLocation &RParenLoc) { + assert(Tok.is(tok::l_paren) && "Not a paren expr!"); + SourceLocation OpenLoc = ConsumeParen(); + ExprResult Result(true); + CastTy = 0; + + if (ExprType >= CompoundStmt && Tok.is(tok::l_brace)) { + Diag(Tok, diag::ext_gnu_statement_expr); + Parser::StmtResult Stmt = ParseCompoundStatement(true); + ExprType = CompoundStmt; + + // If the substmt parsed correctly, build the AST node. + if (!Stmt.isInvalid && Tok.is(tok::r_paren)) + Result = Actions.ActOnStmtExpr(OpenLoc, Stmt.Val, Tok.getLocation()); + + } else if (ExprType >= CompoundLiteral && isTypeSpecifierQualifier()) { + // Otherwise, this is a compound literal expression or cast expression. + TypeTy *Ty = ParseTypeName(); + + // Match the ')'. + if (Tok.is(tok::r_paren)) + RParenLoc = ConsumeParen(); + else + MatchRHSPunctuation(tok::r_paren, OpenLoc); + + if (Tok.is(tok::l_brace)) { + if (!getLang().C99) // Compound literals don't exist in C90. + Diag(OpenLoc, diag::ext_c99_compound_literal); + Result = ParseInitializer(); + ExprType = CompoundLiteral; + if (!Result.isInvalid) + return Actions.ActOnCompoundLiteral(OpenLoc, Ty, RParenLoc, Result.Val); + } else if (ExprType == CastExpr) { + // Note that this doesn't parse the subsequence cast-expression, it just + // returns the parsed type to the callee. + ExprType = CastExpr; + CastTy = Ty; + return ExprResult(false); + } else { + Diag(Tok, diag::err_expected_lbrace_in_compound_literal); + return ExprResult(true); + } + return Result; + } else { + Result = ParseExpression(); + ExprType = SimpleExpr; + if (!Result.isInvalid && Tok.is(tok::r_paren)) + Result = Actions.ActOnParenExpr(OpenLoc, Tok.getLocation(), Result.Val); + } + + // Match the ')'. + if (Result.isInvalid) + SkipUntil(tok::r_paren); + else { + if (Tok.is(tok::r_paren)) + RParenLoc = ConsumeParen(); + else + MatchRHSPunctuation(tok::r_paren, OpenLoc); + } + + return Result; +} + +/// ParseStringLiteralExpression - This handles the various token types that +/// form string literals, and also handles string concatenation [C99 5.1.1.2, +/// translation phase #6]. +/// +/// primary-expression: [C99 6.5.1] +/// string-literal +Parser::ExprResult Parser::ParseStringLiteralExpression() { + assert(isTokenStringLiteral() && "Not a string literal!"); + + // String concat. Note that keywords like __func__ and __FUNCTION__ are not + // considered to be strings for concatenation purposes. + llvm::SmallVector<Token, 4> StringToks; + + do { + StringToks.push_back(Tok); + ConsumeStringToken(); + } while (isTokenStringLiteral()); + + // Pass the set of string tokens, ready for concatenation, to the actions. + return Actions.ActOnStringLiteral(&StringToks[0], StringToks.size()); +} diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp new file mode 100644 index 00000000000..6b42fb5b089 --- /dev/null +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -0,0 +1,99 @@ +//===--- ParseExprCXX.cpp - C++ Expression Parsing ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Expression parsing implementation for C++. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Diagnostic.h" +#include "clang/Parse/Parser.h" +using namespace clang; + +/// ParseCXXCasts - This handles the various ways to cast expressions to another +/// type. +/// +/// postfix-expression: [C++ 5.2p1] +/// 'dynamic_cast' '<' type-name '>' '(' expression ')' +/// 'static_cast' '<' type-name '>' '(' expression ')' +/// 'reinterpret_cast' '<' type-name '>' '(' expression ')' +/// 'const_cast' '<' type-name '>' '(' expression ')' +/// +Parser::ExprResult Parser::ParseCXXCasts() { + tok::TokenKind Kind = Tok.getKind(); + const char *CastName = 0; // For error messages + + switch (Kind) { + default: assert(0 && "Unknown C++ cast!"); abort(); + case tok::kw_const_cast: CastName = "const_cast"; break; + case tok::kw_dynamic_cast: CastName = "dynamic_cast"; break; + case tok::kw_reinterpret_cast: CastName = "reinterpret_cast"; break; + case tok::kw_static_cast: CastName = "static_cast"; break; + } + + SourceLocation OpLoc = ConsumeToken(); + SourceLocation LAngleBracketLoc = Tok.getLocation(); + + if (ExpectAndConsume(tok::less, diag::err_expected_less_after, CastName)) + return ExprResult(true); + + TypeTy *CastTy = ParseTypeName(); + SourceLocation RAngleBracketLoc = Tok.getLocation(); + + if (ExpectAndConsume(tok::greater, diag::err_expected_greater)) { + Diag(LAngleBracketLoc, diag::err_matching, "<"); + return ExprResult(true); + } + + SourceLocation LParenLoc = Tok.getLocation(), RParenLoc; + + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_expected_lparen_after, CastName); + return ExprResult(true); + } + + ExprResult Result = ParseSimpleParenExpression(RParenLoc); + + if (!Result.isInvalid) + Result = Actions.ActOnCXXCasts(OpLoc, Kind, + LAngleBracketLoc, CastTy, RAngleBracketLoc, + LParenLoc, Result.Val, RParenLoc); + + return Result; +} + +/// ParseCXXBoolLiteral - This handles the C++ Boolean literals. +/// +/// boolean-literal: [C++ 2.13.5] +/// 'true' +/// 'false' +Parser::ExprResult Parser::ParseCXXBoolLiteral() { + tok::TokenKind Kind = Tok.getKind(); + return Actions.ActOnCXXBoolLiteral(ConsumeToken(), Kind); +} + +/// ParseThrowExpression - This handles the C++ throw expression. +/// +/// throw-expression: [C++ 15] +/// 'throw' assignment-expression[opt] +Parser::ExprResult Parser::ParseThrowExpression() { + assert(Tok.is(tok::kw_throw) && "Not throw!"); + + ExprResult Expr; + + SourceLocation ThrowLoc = ConsumeToken(); // Eat the throw token. + // FIXME: Anything that isn't an assignment-expression should bail out now. + if (Tok.is(tok::semi) || Tok.is(tok::r_paren) || Tok.is(tok::colon) || + Tok.is(tok::comma)) + return Actions.ActOnCXXThrow(ThrowLoc); + + Expr = ParseAssignmentExpression(); + if (!Expr.isInvalid) + Expr = Actions.ActOnCXXThrow(ThrowLoc, Expr.Val); + return Expr; +} diff --git a/clang/lib/Parse/ParseInit.cpp b/clang/lib/Parse/ParseInit.cpp new file mode 100644 index 00000000000..45cf86e5b44 --- /dev/null +++ b/clang/lib/Parse/ParseInit.cpp @@ -0,0 +1,227 @@ +//===--- ParseInit.cpp - Initializer Parsing ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements initializer parsing as specified by C99 6.7.8. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/Parser.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/SmallString.h" +using namespace clang; + + +/// MayBeDesignationStart - Return true if this token might be the start of a +/// designator. +static bool MayBeDesignationStart(tok::TokenKind K) { + switch (K) { + default: return false; + case tok::period: // designator: '.' identifier + case tok::l_square: // designator: array-designator + case tok::identifier: // designation: identifier ':' + return true; + } +} + +/// ParseInitializerWithPotentialDesignator - Parse the 'initializer' production +/// checking to see if the token stream starts with a designator. +/// +/// designation: +/// designator-list '=' +/// [GNU] array-designator +/// [GNU] identifier ':' +/// +/// designator-list: +/// designator +/// designator-list designator +/// +/// designator: +/// array-designator +/// '.' identifier +/// +/// array-designator: +/// '[' constant-expression ']' +/// [GNU] '[' constant-expression '...' constant-expression ']' +/// +/// NOTE: [OBC] allows '[ objc-receiver objc-message-args ]' as an +/// initializer. We need to consider this case when parsing array designators. +/// +Parser::ExprResult Parser::ParseInitializerWithPotentialDesignator() { + // Parse each designator in the designator list until we find an initializer. + while (1) { + switch (Tok.getKind()) { + case tok::equal: + // We read some number (at least one due to the grammar we implemented) + // of designators and found an '=' sign. The following tokens must be + // the initializer. + ConsumeToken(); + return ParseInitializer(); + + default: { + // We read some number (at least one due to the grammar we implemented) + // of designators and found something that isn't an = or an initializer. + // If we have exactly one array designator [TODO CHECK], this is the GNU + // 'designation: array-designator' extension. Otherwise, it is a parse + // error. + SourceLocation Loc = Tok.getLocation(); + ExprResult Init = ParseInitializer(); + if (Init.isInvalid) return Init; + + Diag(Tok, diag::ext_gnu_missing_equal_designator); + return Init; + } + case tok::period: + // designator: '.' identifier + ConsumeToken(); + if (ExpectAndConsume(tok::identifier, diag::err_expected_ident)) + return ExprResult(true); + break; + + case tok::l_square: { + // array-designator: '[' constant-expression ']' + // array-designator: '[' constant-expression '...' constant-expression ']' + // When designation is empty, this can be '[' objc-message-expr ']'. Note + // that we also have the case of [4][foo bar], which is the gnu designator + // extension + objc message send. + SourceLocation StartLoc = ConsumeBracket(); + + // If Objective-C is enabled and this is a typename or other identifier + // receiver, parse this as a message send expression. + if (getLang().ObjC1 && isTokObjCMessageIdentifierReceiver()) { + // FIXME: Emit ext_gnu_missing_equal_designator for inits like + // [4][foo bar]. + IdentifierInfo *Name = Tok.getIdentifierInfo(); + ConsumeToken(); + ExprResult R = ParseObjCMessageExpressionBody(StartLoc, Name, 0); + return ParsePostfixExpressionSuffix(R); + } + + // Note that we parse this as an assignment expression, not a constant + // expression (allowing *=, =, etc) to handle the objc case. Sema needs + // to validate that the expression is a constant. + ExprResult Idx = ParseAssignmentExpression(); + if (Idx.isInvalid) { + SkipUntil(tok::r_square); + return Idx; + } + + // Given an expression, we could either have a designator (if the next + // tokens are '...' or ']' or an objc message send. If this is an objc + // message send, handle it now. + if (getLang().ObjC1 && Tok.isNot(tok::ellipsis) && + Tok.isNot(tok::r_square)) { + // FIXME: Emit ext_gnu_missing_equal_designator for inits like + // [4][foo bar]. + ExprResult R = ParseObjCMessageExpressionBody(StartLoc, 0, Idx.Val); + return ParsePostfixExpressionSuffix(R); + } + + // Handle the gnu array range extension. + if (Tok.is(tok::ellipsis)) { + Diag(Tok, diag::ext_gnu_array_range); + ConsumeToken(); + + ExprResult RHS = ParseConstantExpression(); + if (RHS.isInvalid) { + SkipUntil(tok::r_square); + return RHS; + } + } + + MatchRHSPunctuation(tok::r_square, StartLoc); + break; + } + case tok::identifier: { + // Due to the GNU "designation: identifier ':'" extension, we don't know + // whether something starting with an identifier is an + // assignment-expression or if it is an old-style structure field + // designator. + // TODO: Check that this is the first designator. + Token Ident = Tok; + ConsumeToken(); + + // If this is the gross GNU extension, handle it now. + if (Tok.is(tok::colon)) { + Diag(Ident, diag::ext_gnu_old_style_field_designator); + ConsumeToken(); + return ParseInitializer(); + } + + // Otherwise, we just consumed the first token of an expression. Parse + // the rest of it now. + return ParseAssignmentExprWithLeadingIdentifier(Ident); + } + } + } +} + + +/// ParseInitializer +/// initializer: [C99 6.7.8] +/// assignment-expression +/// '{' initializer-list '}' +/// '{' initializer-list ',' '}' +/// [GNU] '{' '}' +/// +/// initializer-list: +/// designation[opt] initializer +/// initializer-list ',' designation[opt] initializer +/// +Parser::ExprResult Parser::ParseInitializer() { + if (Tok.isNot(tok::l_brace)) + return ParseAssignmentExpression(); + + SourceLocation LBraceLoc = ConsumeBrace(); + + // We support empty initializers, but tell the user that they aren't using + // C99-clean code. + if (Tok.is(tok::r_brace)) { + Diag(LBraceLoc, diag::ext_gnu_empty_initializer); + // Match the '}'. + return Actions.ActOnInitList(LBraceLoc, 0, 0, ConsumeBrace()); + } + llvm::SmallVector<ExprTy*, 8> InitExprs; + bool InitExprsOk = true; + + while (1) { + // Parse: designation[opt] initializer + + // If we know that this cannot be a designation, just parse the nested + // initializer directly. + ExprResult SubElt; + if (!MayBeDesignationStart(Tok.getKind())) + SubElt = ParseInitializer(); + else + SubElt = ParseInitializerWithPotentialDesignator(); + + // If we couldn't parse the subelement, bail out. + if (SubElt.isInvalid) { + InitExprsOk = false; + SkipUntil(tok::r_brace, false, true); + break; + } else + InitExprs.push_back(SubElt.Val); + + // If we don't have a comma continued list, we're done. + if (Tok.isNot(tok::comma)) break; + + // FIXME: save comma locations. + ConsumeToken(); + + // Handle trailing comma. + if (Tok.is(tok::r_brace)) break; + } + if (InitExprsOk && Tok.is(tok::r_brace)) + return Actions.ActOnInitList(LBraceLoc, &InitExprs[0], InitExprs.size(), + ConsumeBrace()); + // Match the '}'. + MatchRHSPunctuation(tok::r_brace, LBraceLoc); + return ExprResult(true); // an error occurred. +} + diff --git a/clang/lib/Parse/ParseObjc.cpp b/clang/lib/Parse/ParseObjc.cpp new file mode 100644 index 00000000000..77d2adbd320 --- /dev/null +++ b/clang/lib/Parse/ParseObjc.cpp @@ -0,0 +1,1578 @@ +//===--- ParseObjC.cpp - Objective C Parsing ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Objective-C portions of the Parser interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/Parser.h" +#include "clang/Parse/DeclSpec.h" +#include "clang/Parse/Scope.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/SmallVector.h" +using namespace clang; + + +/// ParseExternalDeclaration: +/// external-declaration: [C99 6.9] +/// [OBJC] objc-class-definition +/// [OBJC] objc-class-declaration +/// [OBJC] objc-alias-declaration +/// [OBJC] objc-protocol-definition +/// [OBJC] objc-method-definition +/// [OBJC] '@' 'end' +Parser::DeclTy *Parser::ParseObjCAtDirectives() { + SourceLocation AtLoc = ConsumeToken(); // the "@" + + switch (Tok.getObjCKeywordID()) { + case tok::objc_class: + return ParseObjCAtClassDeclaration(AtLoc); + case tok::objc_interface: + return ParseObjCAtInterfaceDeclaration(AtLoc); + case tok::objc_protocol: + return ParseObjCAtProtocolDeclaration(AtLoc); + case tok::objc_implementation: + return ParseObjCAtImplementationDeclaration(AtLoc); + case tok::objc_end: + return ParseObjCAtEndDeclaration(AtLoc); + case tok::objc_compatibility_alias: + return ParseObjCAtAliasDeclaration(AtLoc); + case tok::objc_synthesize: + return ParseObjCPropertySynthesize(AtLoc); + case tok::objc_dynamic: + return ParseObjCPropertyDynamic(AtLoc); + default: + Diag(AtLoc, diag::err_unexpected_at); + SkipUntil(tok::semi); + return 0; + } +} + +/// +/// objc-class-declaration: +/// '@' 'class' identifier-list ';' +/// +Parser::DeclTy *Parser::ParseObjCAtClassDeclaration(SourceLocation atLoc) { + ConsumeToken(); // the identifier "class" + llvm::SmallVector<IdentifierInfo *, 8> ClassNames; + + while (1) { + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + SkipUntil(tok::semi); + return 0; + } + ClassNames.push_back(Tok.getIdentifierInfo()); + ConsumeToken(); + + if (Tok.isNot(tok::comma)) + break; + + ConsumeToken(); + } + + // Consume the ';'. + if (ExpectAndConsume(tok::semi, diag::err_expected_semi_after, "@class")) + return 0; + + return Actions.ActOnForwardClassDeclaration(atLoc, + &ClassNames[0], ClassNames.size()); +} + +/// +/// objc-interface: +/// objc-class-interface-attributes[opt] objc-class-interface +/// objc-category-interface +/// +/// objc-class-interface: +/// '@' 'interface' identifier objc-superclass[opt] +/// objc-protocol-refs[opt] +/// objc-class-instance-variables[opt] +/// objc-interface-decl-list +/// @end +/// +/// objc-category-interface: +/// '@' 'interface' identifier '(' identifier[opt] ')' +/// objc-protocol-refs[opt] +/// objc-interface-decl-list +/// @end +/// +/// objc-superclass: +/// ':' identifier +/// +/// objc-class-interface-attributes: +/// __attribute__((visibility("default"))) +/// __attribute__((visibility("hidden"))) +/// __attribute__((deprecated)) +/// __attribute__((unavailable)) +/// __attribute__((objc_exception)) - used by NSException on 64-bit +/// +Parser::DeclTy *Parser::ParseObjCAtInterfaceDeclaration( + SourceLocation atLoc, AttributeList *attrList) { + assert(Tok.isObjCAtKeyword(tok::objc_interface) && + "ParseObjCAtInterfaceDeclaration(): Expected @interface"); + ConsumeToken(); // the "interface" identifier + + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); // missing class or category name. + return 0; + } + // We have a class or category name - consume it. + IdentifierInfo *nameId = Tok.getIdentifierInfo(); + SourceLocation nameLoc = ConsumeToken(); + + if (Tok.is(tok::l_paren)) { // we have a category. + SourceLocation lparenLoc = ConsumeParen(); + SourceLocation categoryLoc, rparenLoc; + IdentifierInfo *categoryId = 0; + llvm::SmallVector<IdentifierInfo *, 8> ProtocolRefs; + + // For ObjC2, the category name is optional (not an error). + if (Tok.is(tok::identifier)) { + categoryId = Tok.getIdentifierInfo(); + categoryLoc = ConsumeToken(); + } else if (!getLang().ObjC2) { + Diag(Tok, diag::err_expected_ident); // missing category name. + return 0; + } + if (Tok.isNot(tok::r_paren)) { + Diag(Tok, diag::err_expected_rparen); + SkipUntil(tok::r_paren, false); // don't stop at ';' + return 0; + } + rparenLoc = ConsumeParen(); + SourceLocation endProtoLoc; + // Next, we need to check for any protocol references. + if (Tok.is(tok::less)) { + if (ParseObjCProtocolReferences(ProtocolRefs, endProtoLoc)) + return 0; + } + if (attrList) // categories don't support attributes. + Diag(Tok, diag::err_objc_no_attributes_on_category); + + DeclTy *CategoryType = Actions.ActOnStartCategoryInterface(atLoc, + nameId, nameLoc, categoryId, categoryLoc, + &ProtocolRefs[0], ProtocolRefs.size(), + endProtoLoc); + + ParseObjCInterfaceDeclList(CategoryType, tok::objc_not_keyword); + + // The @ sign was already consumed by ParseObjCInterfaceDeclList(). + if (Tok.isObjCAtKeyword(tok::objc_end)) { + ConsumeToken(); // the "end" identifier + return CategoryType; + } + Diag(Tok, diag::err_objc_missing_end); + return 0; + } + // Parse a class interface. + IdentifierInfo *superClassId = 0; + SourceLocation superClassLoc; + + if (Tok.is(tok::colon)) { // a super class is specified. + ConsumeToken(); + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); // missing super class name. + return 0; + } + superClassId = Tok.getIdentifierInfo(); + superClassLoc = ConsumeToken(); + } + // Next, we need to check for any protocol references. + llvm::SmallVector<IdentifierInfo *, 8> ProtocolRefs; + SourceLocation endProtoLoc; + if (Tok.is(tok::less)) { + if (ParseObjCProtocolReferences(ProtocolRefs, endProtoLoc)) + return 0; + } + DeclTy *ClsType = Actions.ActOnStartClassInterface( + atLoc, nameId, nameLoc, + superClassId, superClassLoc, &ProtocolRefs[0], + ProtocolRefs.size(), endProtoLoc, attrList); + + if (Tok.is(tok::l_brace)) + ParseObjCClassInstanceVariables(ClsType, atLoc); + + ParseObjCInterfaceDeclList(ClsType, tok::objc_interface); + + // The @ sign was already consumed by ParseObjCInterfaceDeclList(). + if (Tok.isObjCAtKeyword(tok::objc_end)) { + ConsumeToken(); // the "end" identifier + return ClsType; + } + Diag(Tok, diag::err_objc_missing_end); + return 0; +} + +/// objc-interface-decl-list: +/// empty +/// objc-interface-decl-list objc-property-decl [OBJC2] +/// objc-interface-decl-list objc-method-requirement [OBJC2] +/// objc-interface-decl-list objc-method-proto ';' +/// objc-interface-decl-list declaration +/// objc-interface-decl-list ';' +/// +/// objc-method-requirement: [OBJC2] +/// @required +/// @optional +/// +void Parser::ParseObjCInterfaceDeclList(DeclTy *interfaceDecl, + tok::ObjCKeywordKind contextKey) { + llvm::SmallVector<DeclTy*, 32> allMethods; + llvm::SmallVector<DeclTy*, 16> allProperties; + tok::ObjCKeywordKind MethodImplKind = tok::objc_not_keyword; + SourceLocation AtEndLoc; + + while (1) { + if (Tok.is(tok::at)) { + SourceLocation AtLoc = ConsumeToken(); // the "@" + tok::ObjCKeywordKind ocKind = Tok.getObjCKeywordID(); + + if (ocKind == tok::objc_end) { // terminate list + AtEndLoc = AtLoc; + break; + } else if (ocKind == tok::objc_required) { // protocols only + ConsumeToken(); + MethodImplKind = ocKind; + if (contextKey != tok::objc_protocol) + Diag(AtLoc, diag::err_objc_protocol_required); + } else if (ocKind == tok::objc_optional) { // protocols only + ConsumeToken(); + MethodImplKind = ocKind; + if (contextKey != tok::objc_protocol) + Diag(AtLoc, diag::err_objc_protocol_optional); + } else if (ocKind == tok::objc_property) { + allProperties.push_back(ParseObjCPropertyDecl(interfaceDecl, AtLoc)); + continue; + } else { + Diag(Tok, diag::err_objc_illegal_interface_qual); + ConsumeToken(); + } + } + if (Tok.is(tok::minus) || Tok.is(tok::plus)) { + DeclTy *methodPrototype = + ParseObjCMethodPrototype(interfaceDecl, MethodImplKind); + allMethods.push_back(methodPrototype); + // Consume the ';' here, since ParseObjCMethodPrototype() is re-used for + // method definitions. + ExpectAndConsume(tok::semi, diag::err_expected_semi_after,"method proto"); + continue; + } + else if (Tok.is(tok::at)) + continue; + + if (Tok.is(tok::semi)) + ConsumeToken(); + else if (Tok.is(tok::eof)) + break; + else { + // FIXME: as the name implies, this rule allows function definitions. + // We could pass a flag or check for functions during semantic analysis. + ParseDeclarationOrFunctionDefinition(); + } + } + /// Insert collected methods declarations into the @interface object. + Actions.ActOnAtEnd(AtEndLoc, interfaceDecl, &allMethods[0], allMethods.size(), + &allProperties[0], allProperties.size()); +} + +/// Parse property attribute declarations. +/// +/// property-attr-decl: '(' property-attrlist ')' +/// property-attrlist: +/// property-attribute +/// property-attrlist ',' property-attribute +/// property-attribute: +/// getter '=' identifier +/// setter '=' identifier ':' +/// readonly +/// readwrite +/// assign +/// retain +/// copy +/// nonatomic +/// +void Parser::ParseObjCPropertyAttribute (ObjCDeclSpec &DS) { + SourceLocation loc = ConsumeParen(); // consume '(' + while (isObjCPropertyAttribute()) { + const IdentifierInfo *II = Tok.getIdentifierInfo(); + // getter/setter require extra treatment. + if (II == ObjCPropertyAttrs[objc_getter] || + II == ObjCPropertyAttrs[objc_setter]) { + // skip getter/setter part. + SourceLocation loc = ConsumeToken(); + if (Tok.is(tok::equal)) { + loc = ConsumeToken(); + if (Tok.is(tok::identifier)) { + if (II == ObjCPropertyAttrs[objc_setter]) { + DS.setPropertyAttributes(ObjCDeclSpec::DQ_PR_setter); + DS.setSetterName(Tok.getIdentifierInfo()); + loc = ConsumeToken(); // consume method name + if (Tok.isNot(tok::colon)) { + Diag(loc, diag::err_expected_colon); + SkipUntil(tok::r_paren,true,true); + break; + } + } + else { + DS.setPropertyAttributes(ObjCDeclSpec::DQ_PR_getter); + DS.setGetterName(Tok.getIdentifierInfo()); + } + } + else { + Diag(loc, diag::err_expected_ident); + SkipUntil(tok::r_paren,true,true); + break; + } + } + else { + Diag(loc, diag::err_objc_expected_equal); + SkipUntil(tok::r_paren,true,true); + break; + } + } + + else if (II == ObjCPropertyAttrs[objc_readonly]) + DS.setPropertyAttributes(ObjCDeclSpec::DQ_PR_readonly); + else if (II == ObjCPropertyAttrs[objc_assign]) + DS.setPropertyAttributes(ObjCDeclSpec::DQ_PR_assign); + else if (II == ObjCPropertyAttrs[objc_readwrite]) + DS.setPropertyAttributes(ObjCDeclSpec::DQ_PR_readwrite); + else if (II == ObjCPropertyAttrs[objc_retain]) + DS.setPropertyAttributes(ObjCDeclSpec::DQ_PR_retain); + else if (II == ObjCPropertyAttrs[objc_copy]) + DS.setPropertyAttributes(ObjCDeclSpec::DQ_PR_copy); + else if (II == ObjCPropertyAttrs[objc_nonatomic]) + DS.setPropertyAttributes(ObjCDeclSpec::DQ_PR_nonatomic); + + ConsumeToken(); // consume last attribute token + if (Tok.is(tok::comma)) { + loc = ConsumeToken(); + continue; + } + if (Tok.is(tok::r_paren)) + break; + Diag(loc, diag::err_expected_rparen); + SkipUntil(tok::semi); + return; + } + if (Tok.is(tok::r_paren)) + ConsumeParen(); + else { + Diag(loc, diag::err_objc_expected_property_attr); + SkipUntil(tok::r_paren); // recover from error inside attribute list + } +} + +/// Main routine to parse property declaration. +/// +/// @property property-attr-decl[opt] property-component-decl ';' +/// +Parser::DeclTy *Parser::ParseObjCPropertyDecl(DeclTy *interfaceDecl, + SourceLocation AtLoc) { + assert(Tok.isObjCAtKeyword(tok::objc_property) && + "ParseObjCPropertyDecl(): Expected @property"); + ObjCDeclSpec DS; + ConsumeToken(); // the "property" identifier + // Parse property attribute list, if any. + if (Tok.is(tok::l_paren)) { + // property has attribute list. + ParseObjCPropertyAttribute(DS); + } + // Parse declaration portion of @property. + llvm::SmallVector<DeclTy*, 8> PropertyDecls; + ParseStructDeclaration(interfaceDecl, PropertyDecls); + if (Tok.is(tok::semi)) + ConsumeToken(); + else { + Diag(Tok, diag::err_expected_semi_decl_list); + SkipUntil(tok::r_brace, true, true); + } + return Actions.ActOnAddObjCProperties(AtLoc, + &PropertyDecls[0], PropertyDecls.size(), DS); +} + +/// objc-method-proto: +/// objc-instance-method objc-method-decl objc-method-attributes[opt] +/// objc-class-method objc-method-decl objc-method-attributes[opt] +/// +/// objc-instance-method: '-' +/// objc-class-method: '+' +/// +/// objc-method-attributes: [OBJC2] +/// __attribute__((deprecated)) +/// +Parser::DeclTy *Parser::ParseObjCMethodPrototype(DeclTy *IDecl, + tok::ObjCKeywordKind MethodImplKind) { + assert((Tok.is(tok::minus) || Tok.is(tok::plus)) && "expected +/-"); + + tok::TokenKind methodType = Tok.getKind(); + SourceLocation mLoc = ConsumeToken(); + + DeclTy *MDecl = ParseObjCMethodDecl(mLoc, methodType, IDecl, MethodImplKind); + // Since this rule is used for both method declarations and definitions, + // the caller is (optionally) responsible for consuming the ';'. + return MDecl; +} + +/// objc-selector: +/// identifier +/// one of +/// enum struct union if else while do for switch case default +/// break continue return goto asm sizeof typeof __alignof +/// unsigned long const short volatile signed restrict _Complex +/// in out inout bycopy byref oneway int char float double void _Bool +/// +IdentifierInfo *Parser::ParseObjCSelector(SourceLocation &SelectorLoc) { + switch (Tok.getKind()) { + default: + return 0; + case tok::identifier: + case tok::kw_typeof: + case tok::kw___alignof: + case tok::kw_auto: + case tok::kw_break: + case tok::kw_case: + case tok::kw_char: + case tok::kw_const: + case tok::kw_continue: + case tok::kw_default: + case tok::kw_do: + case tok::kw_double: + case tok::kw_else: + case tok::kw_enum: + case tok::kw_extern: + case tok::kw_float: + case tok::kw_for: + case tok::kw_goto: + case tok::kw_if: + case tok::kw_inline: + case tok::kw_int: + case tok::kw_long: + case tok::kw_register: + case tok::kw_restrict: + case tok::kw_return: + case tok::kw_short: + case tok::kw_signed: + case tok::kw_sizeof: + case tok::kw_static: + case tok::kw_struct: + case tok::kw_switch: + case tok::kw_typedef: + case tok::kw_union: + case tok::kw_unsigned: + case tok::kw_void: + case tok::kw_volatile: + case tok::kw_while: + case tok::kw_bool: + case tok::kw__Bool: + case tok::kw__Complex: + IdentifierInfo *II = Tok.getIdentifierInfo(); + SelectorLoc = ConsumeToken(); + return II; + } +} + +/// property-attrlist: one of +/// readonly getter setter assign retain copy nonatomic +/// +bool Parser::isObjCPropertyAttribute() { + if (Tok.is(tok::identifier)) { + const IdentifierInfo *II = Tok.getIdentifierInfo(); + for (unsigned i = 0; i < objc_NumAttrs; ++i) + if (II == ObjCPropertyAttrs[i]) return true; + } + return false; +} + +/// objc-for-collection-in: 'in' +/// +bool Parser::isTokIdentifier_in() const { + // FIXME: May have to do additional look-ahead to only allow for + // valid tokens following an 'in'; such as an identifier, unary operators, + // '[' etc. + return (getLang().ObjC2 && Tok.is(tok::identifier) && + Tok.getIdentifierInfo() == ObjCForCollectionInKW); +} + +/// ParseObjCTypeQualifierList - This routine parses the objective-c's type +/// qualifier list and builds their bitmask representation in the input +/// argument. +/// +/// objc-type-qualifiers: +/// objc-type-qualifier +/// objc-type-qualifiers objc-type-qualifier +/// +void Parser::ParseObjCTypeQualifierList(ObjCDeclSpec &DS) { + while (1) { + if (Tok.isNot(tok::identifier)) + return; + + const IdentifierInfo *II = Tok.getIdentifierInfo(); + for (unsigned i = 0; i != objc_NumQuals; ++i) { + if (II != ObjCTypeQuals[i]) + continue; + + ObjCDeclSpec::ObjCDeclQualifier Qual; + switch (i) { + default: assert(0 && "Unknown decl qualifier"); + case objc_in: Qual = ObjCDeclSpec::DQ_In; break; + case objc_out: Qual = ObjCDeclSpec::DQ_Out; break; + case objc_inout: Qual = ObjCDeclSpec::DQ_Inout; break; + case objc_oneway: Qual = ObjCDeclSpec::DQ_Oneway; break; + case objc_bycopy: Qual = ObjCDeclSpec::DQ_Bycopy; break; + case objc_byref: Qual = ObjCDeclSpec::DQ_Byref; break; + } + DS.setObjCDeclQualifier(Qual); + ConsumeToken(); + II = 0; + break; + } + + // If this wasn't a recognized qualifier, bail out. + if (II) return; + } +} + +/// objc-type-name: +/// '(' objc-type-qualifiers[opt] type-name ')' +/// '(' objc-type-qualifiers[opt] ')' +/// +Parser::TypeTy *Parser::ParseObjCTypeName(ObjCDeclSpec &DS) { + assert(Tok.is(tok::l_paren) && "expected ("); + + SourceLocation LParenLoc = ConsumeParen(), RParenLoc; + TypeTy *Ty = 0; + + // Parse type qualifiers, in, inout, etc. + ParseObjCTypeQualifierList(DS); + + if (isTypeSpecifierQualifier()) { + Ty = ParseTypeName(); + // FIXME: back when Sema support is in place... + // assert(Ty && "Parser::ParseObjCTypeName(): missing type"); + } + if (Tok.isNot(tok::r_paren)) { + MatchRHSPunctuation(tok::r_paren, LParenLoc); + return 0; // FIXME: decide how we want to handle this error... + } + RParenLoc = ConsumeParen(); + return Ty; +} + +/// objc-method-decl: +/// objc-selector +/// objc-keyword-selector objc-parmlist[opt] +/// objc-type-name objc-selector +/// objc-type-name objc-keyword-selector objc-parmlist[opt] +/// +/// objc-keyword-selector: +/// objc-keyword-decl +/// objc-keyword-selector objc-keyword-decl +/// +/// objc-keyword-decl: +/// objc-selector ':' objc-type-name objc-keyword-attributes[opt] identifier +/// objc-selector ':' objc-keyword-attributes[opt] identifier +/// ':' objc-type-name objc-keyword-attributes[opt] identifier +/// ':' objc-keyword-attributes[opt] identifier +/// +/// objc-parmlist: +/// objc-parms objc-ellipsis[opt] +/// +/// objc-parms: +/// objc-parms , parameter-declaration +/// +/// objc-ellipsis: +/// , ... +/// +/// objc-keyword-attributes: [OBJC2] +/// __attribute__((unused)) +/// +Parser::DeclTy *Parser::ParseObjCMethodDecl(SourceLocation mLoc, + tok::TokenKind mType, + DeclTy *IDecl, + tok::ObjCKeywordKind MethodImplKind) +{ + // Parse the return type. + TypeTy *ReturnType = 0; + ObjCDeclSpec DSRet; + if (Tok.is(tok::l_paren)) + ReturnType = ParseObjCTypeName(DSRet); + SourceLocation selLoc; + IdentifierInfo *SelIdent = ParseObjCSelector(selLoc); + if (Tok.isNot(tok::colon)) { + if (!SelIdent) { + Diag(Tok, diag::err_expected_ident); // missing selector name. + // FIXME: this creates a unary selector with a null identifier, is this + // ok?? Maybe we should skip to the next semicolon or something. + } + + // If attributes exist after the method, parse them. + AttributeList *MethodAttrs = 0; + if (getLang().ObjC2 && Tok.is(tok::kw___attribute)) + MethodAttrs = ParseAttributes(); + + Selector Sel = PP.getSelectorTable().getNullarySelector(SelIdent); + return Actions.ActOnMethodDeclaration(mLoc, Tok.getLocation(), + mType, IDecl, DSRet, ReturnType, Sel, + 0, 0, 0, MethodAttrs, MethodImplKind); + } + + llvm::SmallVector<IdentifierInfo *, 12> KeyIdents; + llvm::SmallVector<Action::TypeTy *, 12> KeyTypes; + llvm::SmallVector<ObjCDeclSpec, 12> ArgTypeQuals; + llvm::SmallVector<IdentifierInfo *, 12> ArgNames; + + Action::TypeTy *TypeInfo; + while (1) { + KeyIdents.push_back(SelIdent); + + // Each iteration parses a single keyword argument. + if (Tok.isNot(tok::colon)) { + Diag(Tok, diag::err_expected_colon); + break; + } + ConsumeToken(); // Eat the ':'. + ObjCDeclSpec DSType; + if (Tok.is(tok::l_paren)) { // Parse the argument type. + TypeInfo = ParseObjCTypeName(DSType); + } + else + TypeInfo = 0; + KeyTypes.push_back(TypeInfo); + ArgTypeQuals.push_back(DSType); + + // If attributes exist before the argument name, parse them. + if (getLang().ObjC2 && Tok.is(tok::kw___attribute)) + ParseAttributes(); // FIXME: pass attributes through. + + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); // missing argument name. + break; + } + ArgNames.push_back(Tok.getIdentifierInfo()); + ConsumeToken(); // Eat the identifier. + + // Check for another keyword selector. + SourceLocation Loc; + SelIdent = ParseObjCSelector(Loc); + if (!SelIdent && Tok.isNot(tok::colon)) + break; + // We have a selector or a colon, continue parsing. + } + + bool isVariadic = false; + + // Parse the (optional) parameter list. + while (Tok.is(tok::comma)) { + ConsumeToken(); + if (Tok.is(tok::ellipsis)) { + isVariadic = true; + ConsumeToken(); + break; + } + // FIXME: implement this... + // Parse the c-style argument declaration-specifier. + DeclSpec DS; + ParseDeclarationSpecifiers(DS); + // Parse the declarator. + Declarator ParmDecl(DS, Declarator::PrototypeContext); + ParseDeclarator(ParmDecl); + } + + // FIXME: Add support for optional parmameter list... + // If attributes exist after the method, parse them. + AttributeList *MethodAttrs = 0; + if (getLang().ObjC2 && Tok.is(tok::kw___attribute)) + MethodAttrs = ParseAttributes(); + + Selector Sel = PP.getSelectorTable().getSelector(KeyIdents.size(), + &KeyIdents[0]); + return Actions.ActOnMethodDeclaration(mLoc, Tok.getLocation(), + mType, IDecl, DSRet, ReturnType, Sel, + &ArgTypeQuals[0], &KeyTypes[0], + &ArgNames[0], MethodAttrs, + MethodImplKind, isVariadic); +} + +/// CmpProtocolVals - Comparison predicate for sorting protocols. +static bool CmpProtocolVals(const IdentifierInfo* const& lhs, + const IdentifierInfo* const& rhs) { + return strcmp(lhs->getName(), rhs->getName()) < 0; +} + +/// objc-protocol-refs: +/// '<' identifier-list '>' +/// +bool Parser::ParseObjCProtocolReferences( + llvm::SmallVectorImpl<IdentifierInfo*> &ProtocolRefs, SourceLocation &endLoc) +{ + assert(Tok.is(tok::less) && "expected <"); + + ConsumeToken(); // the "<" + + while (1) { + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + SkipUntil(tok::greater); + return true; + } + ProtocolRefs.push_back(Tok.getIdentifierInfo()); + ConsumeToken(); + + if (Tok.isNot(tok::comma)) + break; + ConsumeToken(); + } + + // Sort protocols, keyed by name. + // Later on, we remove duplicates. + std::stable_sort(ProtocolRefs.begin(), ProtocolRefs.end(), CmpProtocolVals); + + // Make protocol names unique. + ProtocolRefs.erase(std::unique(ProtocolRefs.begin(), ProtocolRefs.end()), + ProtocolRefs.end()); + // Consume the '>'. + if (Tok.is(tok::greater)) { + endLoc = ConsumeAnyToken(); + return false; + } + Diag(Tok, diag::err_expected_greater); + return true; +} + +/// objc-class-instance-variables: +/// '{' objc-instance-variable-decl-list[opt] '}' +/// +/// objc-instance-variable-decl-list: +/// objc-visibility-spec +/// objc-instance-variable-decl ';' +/// ';' +/// objc-instance-variable-decl-list objc-visibility-spec +/// objc-instance-variable-decl-list objc-instance-variable-decl ';' +/// objc-instance-variable-decl-list ';' +/// +/// objc-visibility-spec: +/// @private +/// @protected +/// @public +/// @package [OBJC2] +/// +/// objc-instance-variable-decl: +/// struct-declaration +/// +void Parser::ParseObjCClassInstanceVariables(DeclTy *interfaceDecl, + SourceLocation atLoc) { + assert(Tok.is(tok::l_brace) && "expected {"); + llvm::SmallVector<DeclTy*, 16> IvarDecls; + llvm::SmallVector<DeclTy*, 32> AllIvarDecls; + llvm::SmallVector<tok::ObjCKeywordKind, 32> AllVisibilities; + + SourceLocation LBraceLoc = ConsumeBrace(); // the "{" + + tok::ObjCKeywordKind visibility = tok::objc_private; + // While we still have something to read, read the instance variables. + while (Tok.isNot(tok::r_brace) && Tok.isNot(tok::eof)) { + // Each iteration of this loop reads one objc-instance-variable-decl. + + // Check for extraneous top-level semicolon. + if (Tok.is(tok::semi)) { + Diag(Tok, diag::ext_extra_struct_semi); + ConsumeToken(); + continue; + } + // Set the default visibility to private. + if (Tok.is(tok::at)) { // parse objc-visibility-spec + ConsumeToken(); // eat the @ sign + switch (Tok.getObjCKeywordID()) { + case tok::objc_private: + case tok::objc_public: + case tok::objc_protected: + case tok::objc_package: + visibility = Tok.getObjCKeywordID(); + ConsumeToken(); + continue; + default: + Diag(Tok, diag::err_objc_illegal_visibility_spec); + ConsumeToken(); + continue; + } + } + ParseStructDeclaration(interfaceDecl, IvarDecls); + for (unsigned i = 0; i < IvarDecls.size(); i++) { + AllIvarDecls.push_back(IvarDecls[i]); + AllVisibilities.push_back(visibility); + } + IvarDecls.clear(); + + if (Tok.is(tok::semi)) { + ConsumeToken(); + } else if (Tok.is(tok::r_brace)) { + Diag(Tok.getLocation(), diag::ext_expected_semi_decl_list); + break; + } else { + Diag(Tok, diag::err_expected_semi_decl_list); + // Skip to end of block or statement + SkipUntil(tok::r_brace, true, true); + } + } + SourceLocation RBraceLoc = MatchRHSPunctuation(tok::r_brace, LBraceLoc); + // Call ActOnFields() even if we don't have any decls. This is useful + // for code rewriting tools that need to be aware of the empty list. + Actions.ActOnFields(CurScope, atLoc, interfaceDecl, + &AllIvarDecls[0], AllIvarDecls.size(), + LBraceLoc, RBraceLoc, &AllVisibilities[0]); + return; +} + +/// objc-protocol-declaration: +/// objc-protocol-definition +/// objc-protocol-forward-reference +/// +/// objc-protocol-definition: +/// @protocol identifier +/// objc-protocol-refs[opt] +/// objc-interface-decl-list +/// @end +/// +/// objc-protocol-forward-reference: +/// @protocol identifier-list ';' +/// +/// "@protocol identifier ;" should be resolved as "@protocol +/// identifier-list ;": objc-interface-decl-list may not start with a +/// semicolon in the first alternative if objc-protocol-refs are omitted. + +Parser::DeclTy *Parser::ParseObjCAtProtocolDeclaration(SourceLocation AtLoc) { + assert(Tok.isObjCAtKeyword(tok::objc_protocol) && + "ParseObjCAtProtocolDeclaration(): Expected @protocol"); + ConsumeToken(); // the "protocol" identifier + + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); // missing protocol name. + return 0; + } + // Save the protocol name, then consume it. + IdentifierInfo *protocolName = Tok.getIdentifierInfo(); + SourceLocation nameLoc = ConsumeToken(); + + llvm::SmallVector<IdentifierInfo *, 8> ProtocolRefs; + if (Tok.is(tok::semi)) { // forward declaration of one protocol. + ConsumeToken(); + ProtocolRefs.push_back(protocolName); + } + if (Tok.is(tok::comma)) { // list of forward declarations. + // Parse the list of forward declarations. + ProtocolRefs.push_back(protocolName); + + while (1) { + ConsumeToken(); // the ',' + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + SkipUntil(tok::semi); + return 0; + } + ProtocolRefs.push_back(Tok.getIdentifierInfo()); + ConsumeToken(); // the identifier + + if (Tok.isNot(tok::comma)) + break; + } + // Consume the ';'. + if (ExpectAndConsume(tok::semi, diag::err_expected_semi_after, "@protocol")) + return 0; + } + if (ProtocolRefs.size() > 0) + return Actions.ActOnForwardProtocolDeclaration(AtLoc, + &ProtocolRefs[0], + ProtocolRefs.size()); + // Last, and definitely not least, parse a protocol declaration. + SourceLocation endProtoLoc; + if (Tok.is(tok::less)) { + if (ParseObjCProtocolReferences(ProtocolRefs, endProtoLoc)) + return 0; + } + + DeclTy *ProtoType = Actions.ActOnStartProtocolInterface(AtLoc, + protocolName, nameLoc, + &ProtocolRefs[0], + ProtocolRefs.size(), endProtoLoc); + ParseObjCInterfaceDeclList(ProtoType, tok::objc_protocol); + + // The @ sign was already consumed by ParseObjCInterfaceDeclList(). + if (Tok.isObjCAtKeyword(tok::objc_end)) { + ConsumeToken(); // the "end" identifier + return ProtoType; + } + Diag(Tok, diag::err_objc_missing_end); + return 0; +} + +/// objc-implementation: +/// objc-class-implementation-prologue +/// objc-category-implementation-prologue +/// +/// objc-class-implementation-prologue: +/// @implementation identifier objc-superclass[opt] +/// objc-class-instance-variables[opt] +/// +/// objc-category-implementation-prologue: +/// @implementation identifier ( identifier ) + +Parser::DeclTy *Parser::ParseObjCAtImplementationDeclaration( + SourceLocation atLoc) { + assert(Tok.isObjCAtKeyword(tok::objc_implementation) && + "ParseObjCAtImplementationDeclaration(): Expected @implementation"); + ConsumeToken(); // the "implementation" identifier + + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); // missing class or category name. + return 0; + } + // We have a class or category name - consume it. + IdentifierInfo *nameId = Tok.getIdentifierInfo(); + SourceLocation nameLoc = ConsumeToken(); // consume class or category name + + if (Tok.is(tok::l_paren)) { + // we have a category implementation. + SourceLocation lparenLoc = ConsumeParen(); + SourceLocation categoryLoc, rparenLoc; + IdentifierInfo *categoryId = 0; + + if (Tok.is(tok::identifier)) { + categoryId = Tok.getIdentifierInfo(); + categoryLoc = ConsumeToken(); + } else { + Diag(Tok, diag::err_expected_ident); // missing category name. + return 0; + } + if (Tok.isNot(tok::r_paren)) { + Diag(Tok, diag::err_expected_rparen); + SkipUntil(tok::r_paren, false); // don't stop at ';' + return 0; + } + rparenLoc = ConsumeParen(); + DeclTy *ImplCatType = Actions.ActOnStartCategoryImplementation( + atLoc, nameId, nameLoc, categoryId, + categoryLoc); + ObjCImpDecl = ImplCatType; + return 0; + } + // We have a class implementation + SourceLocation superClassLoc; + IdentifierInfo *superClassId = 0; + if (Tok.is(tok::colon)) { + // We have a super class + ConsumeToken(); + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); // missing super class name. + return 0; + } + superClassId = Tok.getIdentifierInfo(); + superClassLoc = ConsumeToken(); // Consume super class name + } + DeclTy *ImplClsType = Actions.ActOnStartClassImplementation( + atLoc, nameId, nameLoc, + superClassId, superClassLoc); + + if (Tok.is(tok::l_brace)) // we have ivars + ParseObjCClassInstanceVariables(ImplClsType/*FIXME*/, atLoc); + ObjCImpDecl = ImplClsType; + + return 0; +} + +Parser::DeclTy *Parser::ParseObjCAtEndDeclaration(SourceLocation atLoc) { + assert(Tok.isObjCAtKeyword(tok::objc_end) && + "ParseObjCAtEndDeclaration(): Expected @end"); + ConsumeToken(); // the "end" identifier + if (ObjCImpDecl) + Actions.ActOnAtEnd(atLoc, ObjCImpDecl); + else + Diag(atLoc, diag::warn_expected_implementation); // missing @implementation + return ObjCImpDecl; +} + +/// compatibility-alias-decl: +/// @compatibility_alias alias-name class-name ';' +/// +Parser::DeclTy *Parser::ParseObjCAtAliasDeclaration(SourceLocation atLoc) { + assert(Tok.isObjCAtKeyword(tok::objc_compatibility_alias) && + "ParseObjCAtAliasDeclaration(): Expected @compatibility_alias"); + ConsumeToken(); // consume compatibility_alias + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + return 0; + } + IdentifierInfo *aliasId = Tok.getIdentifierInfo(); + SourceLocation aliasLoc = ConsumeToken(); // consume alias-name + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + return 0; + } + IdentifierInfo *classId = Tok.getIdentifierInfo(); + SourceLocation classLoc = ConsumeToken(); // consume class-name; + if (Tok.isNot(tok::semi)) { + Diag(Tok, diag::err_expected_semi_after, "@compatibility_alias"); + return 0; + } + DeclTy *ClsType = Actions.ActOnCompatiblityAlias(atLoc, + aliasId, aliasLoc, + classId, classLoc); + return ClsType; +} + +/// property-synthesis: +/// @synthesize property-ivar-list ';' +/// +/// property-ivar-list: +/// property-ivar +/// property-ivar-list ',' property-ivar +/// +/// property-ivar: +/// identifier +/// identifier '=' identifier +/// +Parser::DeclTy *Parser::ParseObjCPropertySynthesize(SourceLocation atLoc) { + assert(Tok.isObjCAtKeyword(tok::objc_synthesize) && + "ParseObjCPropertyDynamic(): Expected '@synthesize'"); + SourceLocation loc = ConsumeToken(); // consume dynamic + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + return 0; + } + while (Tok.is(tok::identifier)) { + ConsumeToken(); // consume property name + if (Tok.is(tok::equal)) { + // property '=' ivar-name + ConsumeToken(); // consume '=' + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + break; + } + ConsumeToken(); // consume ivar-name + } + if (Tok.isNot(tok::comma)) + break; + ConsumeToken(); // consume ',' + } + if (Tok.isNot(tok::semi)) + Diag(Tok, diag::err_expected_semi_after, "@synthesize"); + return 0; +} + +/// property-dynamic: +/// @dynamic property-list +/// +/// property-list: +/// identifier +/// property-list ',' identifier +/// +Parser::DeclTy *Parser::ParseObjCPropertyDynamic(SourceLocation atLoc) { + assert(Tok.isObjCAtKeyword(tok::objc_dynamic) && + "ParseObjCPropertyDynamic(): Expected '@dynamic'"); + SourceLocation loc = ConsumeToken(); // consume dynamic + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + return 0; + } + while (Tok.is(tok::identifier)) { + ConsumeToken(); // consume property name + if (Tok.isNot(tok::comma)) + break; + ConsumeToken(); // consume ',' + } + if (Tok.isNot(tok::semi)) + Diag(Tok, diag::err_expected_semi_after, "@dynamic"); + return 0; +} + +/// objc-throw-statement: +/// throw expression[opt]; +/// +Parser::StmtResult Parser::ParseObjCThrowStmt(SourceLocation atLoc) { + ExprResult Res; + ConsumeToken(); // consume throw + if (Tok.isNot(tok::semi)) { + Res = ParseExpression(); + if (Res.isInvalid) { + SkipUntil(tok::semi); + return true; + } + } + ConsumeToken(); // consume ';' + return Actions.ActOnObjCAtThrowStmt(atLoc, Res.Val); +} + +/// objc-synchronized-statement: +/// @synchronized '(' expression ')' compound-statement +/// +Parser::StmtResult Parser::ParseObjCSynchronizedStmt(SourceLocation atLoc) { + ConsumeToken(); // consume synchronized + if (Tok.isNot(tok::l_paren)) { + Diag (Tok, diag::err_expected_lparen_after, "@synchronized"); + return true; + } + ConsumeParen(); // '(' + ExprResult Res = ParseExpression(); + if (Res.isInvalid) { + SkipUntil(tok::semi); + return true; + } + if (Tok.isNot(tok::r_paren)) { + Diag (Tok, diag::err_expected_lbrace); + return true; + } + ConsumeParen(); // ')' + if (Tok.isNot(tok::l_brace)) { + Diag (Tok, diag::err_expected_lbrace); + return true; + } + StmtResult SynchBody = ParseCompoundStatementBody(); + if (SynchBody.isInvalid) + SynchBody = Actions.ActOnNullStmt(Tok.getLocation()); + return Actions.ActOnObjCAtSynchronizedStmt(atLoc, Res.Val, SynchBody.Val); +} + +/// objc-try-catch-statement: +/// @try compound-statement objc-catch-list[opt] +/// @try compound-statement objc-catch-list[opt] @finally compound-statement +/// +/// objc-catch-list: +/// @catch ( parameter-declaration ) compound-statement +/// objc-catch-list @catch ( catch-parameter-declaration ) compound-statement +/// catch-parameter-declaration: +/// parameter-declaration +/// '...' [OBJC2] +/// +Parser::StmtResult Parser::ParseObjCTryStmt(SourceLocation atLoc) { + bool catch_or_finally_seen = false; + + ConsumeToken(); // consume try + if (Tok.isNot(tok::l_brace)) { + Diag (Tok, diag::err_expected_lbrace); + return true; + } + StmtResult CatchStmts; + StmtResult FinallyStmt; + StmtResult TryBody = ParseCompoundStatementBody(); + if (TryBody.isInvalid) + TryBody = Actions.ActOnNullStmt(Tok.getLocation()); + + while (Tok.is(tok::at)) { + // At this point, we need to lookahead to determine if this @ is the start + // of an @catch or @finally. We don't want to consume the @ token if this + // is an @try or @encode or something else. + Token AfterAt = GetLookAheadToken(1); + if (!AfterAt.isObjCAtKeyword(tok::objc_catch) && + !AfterAt.isObjCAtKeyword(tok::objc_finally)) + break; + + SourceLocation AtCatchFinallyLoc = ConsumeToken(); + if (Tok.isObjCAtKeyword(tok::objc_catch)) { + StmtTy *FirstPart = 0; + ConsumeToken(); // consume catch + if (Tok.is(tok::l_paren)) { + ConsumeParen(); + EnterScope(Scope::DeclScope); + if (Tok.isNot(tok::ellipsis)) { + DeclSpec DS; + ParseDeclarationSpecifiers(DS); + // FIXME: Is BlockContext right? + Declarator DeclaratorInfo(DS, Declarator::BlockContext); + ParseDeclarator(DeclaratorInfo); + DeclTy *aBlockVarDecl = Actions.ActOnDeclarator(CurScope, + DeclaratorInfo, 0); + StmtResult stmtResult = + Actions.ActOnDeclStmt(aBlockVarDecl, DS.getSourceRange().getBegin(), + DeclaratorInfo.getSourceRange().getEnd()); + FirstPart = stmtResult.isInvalid ? 0 : stmtResult.Val; + } else + ConsumeToken(); // consume '...' + SourceLocation RParenLoc = ConsumeParen(); + + StmtResult CatchBody(true); + if (Tok.is(tok::l_brace)) + CatchBody = ParseCompoundStatementBody(); + else + Diag(Tok, diag::err_expected_lbrace); + if (CatchBody.isInvalid) + CatchBody = Actions.ActOnNullStmt(Tok.getLocation()); + CatchStmts = Actions.ActOnObjCAtCatchStmt(AtCatchFinallyLoc, RParenLoc, + FirstPart, CatchBody.Val, CatchStmts.Val); + ExitScope(); + } else { + Diag(AtCatchFinallyLoc, diag::err_expected_lparen_after, + "@catch clause"); + return true; + } + catch_or_finally_seen = true; + } else { + assert(Tok.isObjCAtKeyword(tok::objc_finally) && "Lookahead confused?"); + ConsumeToken(); // consume finally + + StmtResult FinallyBody(true); + if (Tok.is(tok::l_brace)) + FinallyBody = ParseCompoundStatementBody(); + else + Diag(Tok, diag::err_expected_lbrace); + if (FinallyBody.isInvalid) + FinallyBody = Actions.ActOnNullStmt(Tok.getLocation()); + FinallyStmt = Actions.ActOnObjCAtFinallyStmt(AtCatchFinallyLoc, + FinallyBody.Val); + catch_or_finally_seen = true; + break; + } + } + if (!catch_or_finally_seen) { + Diag(atLoc, diag::err_missing_catch_finally); + return true; + } + return Actions.ActOnObjCAtTryStmt(atLoc, TryBody.Val, CatchStmts.Val, + FinallyStmt.Val); +} + +/// objc-method-def: objc-method-proto ';'[opt] '{' body '}' +/// +Parser::DeclTy *Parser::ParseObjCMethodDefinition() { + DeclTy *MDecl = ParseObjCMethodPrototype(ObjCImpDecl); + // parse optional ';' + if (Tok.is(tok::semi)) + ConsumeToken(); + + // We should have an opening brace now. + if (Tok.isNot(tok::l_brace)) { + Diag(Tok, diag::err_expected_method_body); + + // Skip over garbage, until we get to '{'. Don't eat the '{'. + SkipUntil(tok::l_brace, true, true); + + // If we didn't find the '{', bail out. + if (Tok.isNot(tok::l_brace)) + return 0; + } + SourceLocation BraceLoc = Tok.getLocation(); + + // Enter a scope for the method body. + EnterScope(Scope::FnScope|Scope::DeclScope); + + // Tell the actions module that we have entered a method definition with the + // specified Declarator for the method. + Actions.ObjCActOnStartOfMethodDef(CurScope, MDecl); + + StmtResult FnBody = ParseCompoundStatementBody(); + + // If the function body could not be parsed, make a bogus compoundstmt. + if (FnBody.isInvalid) + FnBody = Actions.ActOnCompoundStmt(BraceLoc, BraceLoc, 0, 0, false); + + // Leave the function body scope. + ExitScope(); + + // TODO: Pass argument information. + Actions.ActOnFinishFunctionBody(MDecl, FnBody.Val); + return MDecl; +} + +Parser::StmtResult Parser::ParseObjCAtStatement(SourceLocation AtLoc) { + if (Tok.isObjCAtKeyword(tok::objc_try)) { + return ParseObjCTryStmt(AtLoc); + } else if (Tok.isObjCAtKeyword(tok::objc_throw)) + return ParseObjCThrowStmt(AtLoc); + else if (Tok.isObjCAtKeyword(tok::objc_synchronized)) + return ParseObjCSynchronizedStmt(AtLoc); + ExprResult Res = ParseExpressionWithLeadingAt(AtLoc); + if (Res.isInvalid) { + // If the expression is invalid, skip ahead to the next semicolon. Not + // doing this opens us up to the possibility of infinite loops if + // ParseExpression does not consume any tokens. + SkipUntil(tok::semi); + return true; + } + // Otherwise, eat the semicolon. + ExpectAndConsume(tok::semi, diag::err_expected_semi_after_expr); + return Actions.ActOnExprStmt(Res.Val); +} + +Parser::ExprResult Parser::ParseObjCAtExpression(SourceLocation AtLoc) { + + switch (Tok.getKind()) { + case tok::string_literal: // primary-expression: string-literal + case tok::wide_string_literal: + return ParsePostfixExpressionSuffix(ParseObjCStringLiteral(AtLoc)); + default: + break; + } + + switch (Tok.getIdentifierInfo()->getObjCKeywordID()) { + case tok::objc_encode: + return ParsePostfixExpressionSuffix(ParseObjCEncodeExpression(AtLoc)); + case tok::objc_protocol: + return ParsePostfixExpressionSuffix(ParseObjCProtocolExpression(AtLoc)); + case tok::objc_selector: + return ParsePostfixExpressionSuffix(ParseObjCSelectorExpression(AtLoc)); + default: + Diag(AtLoc, diag::err_unexpected_at); + SkipUntil(tok::semi); + return true; + } +} + +/// objc-message-expr: +/// '[' objc-receiver objc-message-args ']' +/// +/// objc-receiver: +/// expression +/// class-name +/// type-name +Parser::ExprResult Parser::ParseObjCMessageExpression() { + assert(Tok.is(tok::l_square) && "'[' expected"); + SourceLocation LBracLoc = ConsumeBracket(); // consume '[' + + // Parse receiver + if (isTokObjCMessageIdentifierReceiver()) { + IdentifierInfo *ReceiverName = Tok.getIdentifierInfo(); + ConsumeToken(); + return ParseObjCMessageExpressionBody(LBracLoc, ReceiverName, 0); + } + + ExprResult Res = ParseAssignmentExpression(); + if (Res.isInvalid) { + Diag(Tok, diag::err_invalid_receiver_to_message); + SkipUntil(tok::r_square); + return Res; + } + return ParseObjCMessageExpressionBody(LBracLoc, 0, Res.Val); +} + +/// ParseObjCMessageExpressionBody - Having parsed "'[' objc-receiver", parse +/// the rest of a message expression. +/// +/// objc-message-args: +/// objc-selector +/// objc-keywordarg-list +/// +/// objc-keywordarg-list: +/// objc-keywordarg +/// objc-keywordarg-list objc-keywordarg +/// +/// objc-keywordarg: +/// selector-name[opt] ':' objc-keywordexpr +/// +/// objc-keywordexpr: +/// nonempty-expr-list +/// +/// nonempty-expr-list: +/// assignment-expression +/// nonempty-expr-list , assignment-expression +/// +Parser::ExprResult +Parser::ParseObjCMessageExpressionBody(SourceLocation LBracLoc, + IdentifierInfo *ReceiverName, + ExprTy *ReceiverExpr) { + // Parse objc-selector + SourceLocation Loc; + IdentifierInfo *selIdent = ParseObjCSelector(Loc); + + llvm::SmallVector<IdentifierInfo *, 12> KeyIdents; + llvm::SmallVector<Action::ExprTy *, 12> KeyExprs; + + if (Tok.is(tok::colon)) { + while (1) { + // Each iteration parses a single keyword argument. + KeyIdents.push_back(selIdent); + + if (Tok.isNot(tok::colon)) { + Diag(Tok, diag::err_expected_colon); + SkipUntil(tok::semi); + return true; + } + ConsumeToken(); // Eat the ':'. + /// Parse the expression after ':' + ExprResult Res = ParseAssignmentExpression(); + if (Res.isInvalid) { + SkipUntil(tok::identifier); + return Res; + } + // We have a valid expression. + KeyExprs.push_back(Res.Val); + + // Check for another keyword selector. + selIdent = ParseObjCSelector(Loc); + if (!selIdent && Tok.isNot(tok::colon)) + break; + // We have a selector or a colon, continue parsing. + } + // Parse the, optional, argument list, comma separated. + while (Tok.is(tok::comma)) { + ConsumeToken(); // Eat the ','. + /// Parse the expression after ',' + ExprResult Res = ParseAssignmentExpression(); + if (Res.isInvalid) { + SkipUntil(tok::identifier); + return Res; + } + // We have a valid expression. + KeyExprs.push_back(Res.Val); + } + } else if (!selIdent) { + Diag(Tok, diag::err_expected_ident); // missing selector name. + SkipUntil(tok::semi); + return true; + } + + if (Tok.isNot(tok::r_square)) { + Diag(Tok, diag::err_expected_rsquare); + SkipUntil(tok::semi); + return true; + } + SourceLocation RBracLoc = ConsumeBracket(); // consume ']' + + unsigned nKeys = KeyIdents.size(); + if (nKeys == 0) + KeyIdents.push_back(selIdent); + Selector Sel = PP.getSelectorTable().getSelector(nKeys, &KeyIdents[0]); + + // We've just parsed a keyword message. + if (ReceiverName) + return Actions.ActOnClassMessage(CurScope, + ReceiverName, Sel, LBracLoc, RBracLoc, + &KeyExprs[0], KeyExprs.size()); + return Actions.ActOnInstanceMessage(ReceiverExpr, Sel, LBracLoc, RBracLoc, + &KeyExprs[0], KeyExprs.size()); +} + +Parser::ExprResult Parser::ParseObjCStringLiteral(SourceLocation AtLoc) { + ExprResult Res = ParseStringLiteralExpression(); + if (Res.isInvalid) return Res; + + // @"foo" @"bar" is a valid concatenated string. Eat any subsequent string + // expressions. At this point, we know that the only valid thing that starts + // with '@' is an @"". + llvm::SmallVector<SourceLocation, 4> AtLocs; + llvm::SmallVector<ExprTy*, 4> AtStrings; + AtLocs.push_back(AtLoc); + AtStrings.push_back(Res.Val); + + while (Tok.is(tok::at)) { + AtLocs.push_back(ConsumeToken()); // eat the @. + + ExprResult Res(true); // Invalid unless there is a string literal. + if (isTokenStringLiteral()) + Res = ParseStringLiteralExpression(); + else + Diag(Tok, diag::err_objc_concat_string); + + if (Res.isInvalid) { + while (!AtStrings.empty()) { + Actions.DeleteExpr(AtStrings.back()); + AtStrings.pop_back(); + } + return Res; + } + + AtStrings.push_back(Res.Val); + } + + return Actions.ParseObjCStringLiteral(&AtLocs[0], &AtStrings[0], + AtStrings.size()); +} + +/// objc-encode-expression: +/// @encode ( type-name ) +Parser::ExprResult Parser::ParseObjCEncodeExpression(SourceLocation AtLoc) { + assert(Tok.isObjCAtKeyword(tok::objc_encode) && "Not an @encode expression!"); + + SourceLocation EncLoc = ConsumeToken(); + + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_expected_lparen_after, "@encode"); + return true; + } + + SourceLocation LParenLoc = ConsumeParen(); + + TypeTy *Ty = ParseTypeName(); + + SourceLocation RParenLoc = MatchRHSPunctuation(tok::r_paren, LParenLoc); + + return Actions.ParseObjCEncodeExpression(AtLoc, EncLoc, LParenLoc, Ty, + RParenLoc); +} + +/// objc-protocol-expression +/// @protocol ( protocol-name ) + +Parser::ExprResult Parser::ParseObjCProtocolExpression(SourceLocation AtLoc) +{ + SourceLocation ProtoLoc = ConsumeToken(); + + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_expected_lparen_after, "@protocol"); + return true; + } + + SourceLocation LParenLoc = ConsumeParen(); + + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + return true; + } + IdentifierInfo *protocolId = Tok.getIdentifierInfo(); + ConsumeToken(); + + SourceLocation RParenLoc = MatchRHSPunctuation(tok::r_paren, LParenLoc); + + return Actions.ParseObjCProtocolExpression(protocolId, AtLoc, ProtoLoc, + LParenLoc, RParenLoc); +} + +/// objc-selector-expression +/// @selector '(' objc-keyword-selector ')' +Parser::ExprResult Parser::ParseObjCSelectorExpression(SourceLocation AtLoc) +{ + SourceLocation SelectorLoc = ConsumeToken(); + + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_expected_lparen_after, "@selector"); + return 0; + } + + llvm::SmallVector<IdentifierInfo *, 12> KeyIdents; + SourceLocation LParenLoc = ConsumeParen(); + SourceLocation sLoc; + IdentifierInfo *SelIdent = ParseObjCSelector(sLoc); + if (!SelIdent && Tok.isNot(tok::colon)) { + Diag(Tok, diag::err_expected_ident); // missing selector name. + return 0; + } + KeyIdents.push_back(SelIdent); + unsigned nColons = 0; + if (Tok.isNot(tok::r_paren)) { + while (1) { + if (Tok.isNot(tok::colon)) { + Diag(Tok, diag::err_expected_colon); + break; + } + nColons++; + ConsumeToken(); // Eat the ':'. + if (Tok.is(tok::r_paren)) + break; + // Check for another keyword selector. + SourceLocation Loc; + SelIdent = ParseObjCSelector(Loc); + KeyIdents.push_back(SelIdent); + if (!SelIdent && Tok.isNot(tok::colon)) + break; + } + } + SourceLocation RParenLoc = MatchRHSPunctuation(tok::r_paren, LParenLoc); + Selector Sel = PP.getSelectorTable().getSelector(nColons, &KeyIdents[0]); + return Actions.ParseObjCSelectorExpression(Sel, AtLoc, SelectorLoc, LParenLoc, + RParenLoc); + } diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp new file mode 100644 index 00000000000..f9800219ed5 --- /dev/null +++ b/clang/lib/Parse/ParseStmt.cpp @@ -0,0 +1,1159 @@ +//===--- ParseStmt.cpp - Statement and Block Parser -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Statement and Block portions of the Parser +// interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/Parser.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Parse/DeclSpec.h" +#include "clang/Parse/Scope.h" +using namespace clang; + +//===----------------------------------------------------------------------===// +// C99 6.8: Statements and Blocks. +//===----------------------------------------------------------------------===// + +/// ParseStatementOrDeclaration - Read 'statement' or 'declaration'. +/// StatementOrDeclaration: +/// statement +/// declaration +/// +/// statement: +/// labeled-statement +/// compound-statement +/// expression-statement +/// selection-statement +/// iteration-statement +/// jump-statement +/// [OBC] objc-throw-statement +/// [OBC] objc-try-catch-statement +/// [OBC] objc-synchronized-statement +/// [GNU] asm-statement +/// [OMP] openmp-construct [TODO] +/// +/// labeled-statement: +/// identifier ':' statement +/// 'case' constant-expression ':' statement +/// 'default' ':' statement +/// +/// selection-statement: +/// if-statement +/// switch-statement +/// +/// iteration-statement: +/// while-statement +/// do-statement +/// for-statement +/// +/// expression-statement: +/// expression[opt] ';' +/// +/// jump-statement: +/// 'goto' identifier ';' +/// 'continue' ';' +/// 'break' ';' +/// 'return' expression[opt] ';' +/// [GNU] 'goto' '*' expression ';' +/// +/// [OBC] objc-throw-statement: +/// [OBC] '@' 'throw' expression ';' +/// [OBC] '@' 'throw' ';' +/// +Parser::StmtResult Parser::ParseStatementOrDeclaration(bool OnlyStatement) { + const char *SemiError = 0; + Parser::StmtResult Res; + + // Cases in this switch statement should fall through if the parser expects + // the token to end in a semicolon (in which case SemiError should be set), + // or they directly 'return;' if not. + tok::TokenKind Kind = Tok.getKind(); + SourceLocation AtLoc; + switch (Kind) { + case tok::identifier: // C99 6.8.1: labeled-statement + // identifier ':' statement + // declaration (if !OnlyStatement) + // expression[opt] ';' + return ParseIdentifierStatement(OnlyStatement); + + case tok::at: // May be a @try or @throw statement + { + AtLoc = ConsumeToken(); // consume @ + return ParseObjCAtStatement(AtLoc); + } + + default: + if (!OnlyStatement && isDeclarationSpecifier()) { + SourceLocation DeclStart = Tok.getLocation(); + DeclTy *Res = ParseDeclaration(Declarator::BlockContext); + // FIXME: Pass in the right location for the end of the declstmt. + return Actions.ActOnDeclStmt(Res, DeclStart, DeclStart); + } else if (Tok.is(tok::r_brace)) { + Diag(Tok, diag::err_expected_statement); + return true; + } else { + // expression[opt] ';' + ExprResult Res = ParseExpression(); + if (Res.isInvalid) { + // If the expression is invalid, skip ahead to the next semicolon. Not + // doing this opens us up to the possibility of infinite loops if + // ParseExpression does not consume any tokens. + SkipUntil(tok::semi); + return true; + } + // Otherwise, eat the semicolon. + ExpectAndConsume(tok::semi, diag::err_expected_semi_after_expr); + return Actions.ActOnExprStmt(Res.Val); + } + + case tok::kw_case: // C99 6.8.1: labeled-statement + return ParseCaseStatement(); + case tok::kw_default: // C99 6.8.1: labeled-statement + return ParseDefaultStatement(); + + case tok::l_brace: // C99 6.8.2: compound-statement + return ParseCompoundStatement(); + case tok::semi: // C99 6.8.3p3: expression[opt] ';' + return Actions.ActOnNullStmt(ConsumeToken()); + + case tok::kw_if: // C99 6.8.4.1: if-statement + return ParseIfStatement(); + case tok::kw_switch: // C99 6.8.4.2: switch-statement + return ParseSwitchStatement(); + + case tok::kw_while: // C99 6.8.5.1: while-statement + return ParseWhileStatement(); + case tok::kw_do: // C99 6.8.5.2: do-statement + Res = ParseDoStatement(); + SemiError = "do/while loop"; + break; + case tok::kw_for: // C99 6.8.5.3: for-statement + return ParseForStatement(); + + case tok::kw_goto: // C99 6.8.6.1: goto-statement + Res = ParseGotoStatement(); + SemiError = "goto statement"; + break; + case tok::kw_continue: // C99 6.8.6.2: continue-statement + Res = ParseContinueStatement(); + SemiError = "continue statement"; + break; + case tok::kw_break: // C99 6.8.6.3: break-statement + Res = ParseBreakStatement(); + SemiError = "break statement"; + break; + case tok::kw_return: // C99 6.8.6.4: return-statement + Res = ParseReturnStatement(); + SemiError = "return statement"; + break; + + case tok::kw_asm: + bool msAsm = false; + Res = ParseAsmStatement(msAsm); + if (msAsm) return Res; + SemiError = "asm statement"; + break; + } + + // If we reached this code, the statement must end in a semicolon. + if (Tok.is(tok::semi)) { + ConsumeToken(); + } else { + Diag(Tok, diag::err_expected_semi_after, SemiError); + SkipUntil(tok::semi); + } + return Res; +} + +/// ParseIdentifierStatement - Because we don't have two-token lookahead, we +/// have a bit of a quandry here. Reading the identifier is necessary to see if +/// there is a ':' after it. If there is, this is a label, regardless of what +/// else the identifier can mean. If not, this is either part of a declaration +/// (if the identifier is a type-name) or part of an expression. +/// +/// labeled-statement: +/// identifier ':' statement +/// [GNU] identifier ':' attributes[opt] statement +/// declaration (if !OnlyStatement) +/// expression[opt] ';' +/// +Parser::StmtResult Parser::ParseIdentifierStatement(bool OnlyStatement) { + assert(Tok.is(tok::identifier) && Tok.getIdentifierInfo() && + "Not an identifier!"); + + Token IdentTok = Tok; // Save the whole token. + ConsumeToken(); // eat the identifier. + + // identifier ':' statement + if (Tok.is(tok::colon)) { + SourceLocation ColonLoc = ConsumeToken(); + + // Read label attributes, if present. + DeclTy *AttrList = 0; + if (Tok.is(tok::kw___attribute)) + // TODO: save these somewhere. + AttrList = ParseAttributes(); + + StmtResult SubStmt = ParseStatement(); + + // Broken substmt shouldn't prevent the label from being added to the AST. + if (SubStmt.isInvalid) + SubStmt = Actions.ActOnNullStmt(ColonLoc); + + return Actions.ActOnLabelStmt(IdentTok.getLocation(), + IdentTok.getIdentifierInfo(), + ColonLoc, SubStmt.Val); + } + + // Check to see if this is a declaration. + void *TypeRep; + if (!OnlyStatement && + (TypeRep = Actions.isTypeName(*IdentTok.getIdentifierInfo(), CurScope))) { + // Handle this. Warn/disable if in middle of block and !C99. + DeclSpec DS; + + // Add the typedef name to the start of the decl-specs. + const char *PrevSpec = 0; + int isInvalid = DS.SetTypeSpecType(DeclSpec::TST_typedef, + IdentTok.getLocation(), PrevSpec, + TypeRep); + assert(!isInvalid && "First declspec can't be invalid!"); + SourceLocation endProtoLoc; + if (Tok.is(tok::less)) { + llvm::SmallVector<IdentifierInfo *, 8> ProtocolRefs; + ParseObjCProtocolReferences(ProtocolRefs, endProtoLoc); + llvm::SmallVector<DeclTy *, 8> *ProtocolDecl = + new llvm::SmallVector<DeclTy *, 8>; + DS.setProtocolQualifiers(ProtocolDecl); + Actions.FindProtocolDeclaration(IdentTok.getLocation(), + &ProtocolRefs[0], ProtocolRefs.size(), + *ProtocolDecl); + } + + // ParseDeclarationSpecifiers will continue from there. + ParseDeclarationSpecifiers(DS); + + // C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };" + // declaration-specifiers init-declarator-list[opt] ';' + if (Tok.is(tok::semi)) { + // TODO: emit error on 'int;' or 'const enum foo;'. + // if (!DS.isMissingDeclaratorOk()) Diag(...); + + ConsumeToken(); + // FIXME: Return this as a type decl. + return 0; + } + + // Parse all the declarators. + Declarator DeclaratorInfo(DS, Declarator::BlockContext); + ParseDeclarator(DeclaratorInfo); + + DeclTy *Decl = ParseInitDeclaratorListAfterFirstDeclarator(DeclaratorInfo); + if (!Decl) return 0; + return Actions.ActOnDeclStmt(Decl, DS.getSourceRange().getBegin(), + DeclaratorInfo.getSourceRange().getEnd()); + } + + // Otherwise, this is an expression. Seed it with II and parse it. + ExprResult Res = ParseExpressionWithLeadingIdentifier(IdentTok); + if (Res.isInvalid) { + SkipUntil(tok::semi); + return true; + } else if (Tok.isNot(tok::semi)) { + Diag(Tok, diag::err_expected_semi_after, "expression"); + SkipUntil(tok::semi); + return true; + } else { + ConsumeToken(); + // Convert expr to a stmt. + return Actions.ActOnExprStmt(Res.Val); + } +} + +/// ParseCaseStatement +/// labeled-statement: +/// 'case' constant-expression ':' statement +/// [GNU] 'case' constant-expression '...' constant-expression ':' statement +/// +/// Note that this does not parse the 'statement' at the end. +/// +Parser::StmtResult Parser::ParseCaseStatement() { + assert(Tok.is(tok::kw_case) && "Not a case stmt!"); + SourceLocation CaseLoc = ConsumeToken(); // eat the 'case'. + + ExprResult LHS = ParseConstantExpression(); + if (LHS.isInvalid) { + SkipUntil(tok::colon); + return true; + } + + // GNU case range extension. + SourceLocation DotDotDotLoc; + ExprTy *RHSVal = 0; + if (Tok.is(tok::ellipsis)) { + Diag(Tok, diag::ext_gnu_case_range); + DotDotDotLoc = ConsumeToken(); + + ExprResult RHS = ParseConstantExpression(); + if (RHS.isInvalid) { + SkipUntil(tok::colon); + return true; + } + RHSVal = RHS.Val; + } + + if (Tok.isNot(tok::colon)) { + Diag(Tok, diag::err_expected_colon_after, "'case'"); + SkipUntil(tok::colon); + return true; + } + + SourceLocation ColonLoc = ConsumeToken(); + + // Diagnose the common error "switch (X) { case 4: }", which is not valid. + if (Tok.is(tok::r_brace)) { + Diag(Tok, diag::err_label_end_of_compound_statement); + return true; + } + + StmtResult SubStmt = ParseStatement(); + + // Broken substmt shouldn't prevent the case from being added to the AST. + if (SubStmt.isInvalid) + SubStmt = Actions.ActOnNullStmt(ColonLoc); + + return Actions.ActOnCaseStmt(CaseLoc, LHS.Val, DotDotDotLoc, RHSVal, ColonLoc, + SubStmt.Val); +} + +/// ParseDefaultStatement +/// labeled-statement: +/// 'default' ':' statement +/// Note that this does not parse the 'statement' at the end. +/// +Parser::StmtResult Parser::ParseDefaultStatement() { + assert(Tok.is(tok::kw_default) && "Not a default stmt!"); + SourceLocation DefaultLoc = ConsumeToken(); // eat the 'default'. + + if (Tok.isNot(tok::colon)) { + Diag(Tok, diag::err_expected_colon_after, "'default'"); + SkipUntil(tok::colon); + return true; + } + + SourceLocation ColonLoc = ConsumeToken(); + + // Diagnose the common error "switch (X) {... default: }", which is not valid. + if (Tok.is(tok::r_brace)) { + Diag(Tok, diag::err_label_end_of_compound_statement); + return true; + } + + StmtResult SubStmt = ParseStatement(); + if (SubStmt.isInvalid) + return true; + + return Actions.ActOnDefaultStmt(DefaultLoc, ColonLoc, SubStmt.Val, CurScope); +} + + +/// ParseCompoundStatement - Parse a "{}" block. +/// +/// compound-statement: [C99 6.8.2] +/// { block-item-list[opt] } +/// [GNU] { label-declarations block-item-list } [TODO] +/// +/// block-item-list: +/// block-item +/// block-item-list block-item +/// +/// block-item: +/// declaration +/// [GNU] '__extension__' declaration +/// statement +/// [OMP] openmp-directive [TODO] +/// +/// [GNU] label-declarations: +/// [GNU] label-declaration +/// [GNU] label-declarations label-declaration +/// +/// [GNU] label-declaration: +/// [GNU] '__label__' identifier-list ';' +/// +/// [OMP] openmp-directive: [TODO] +/// [OMP] barrier-directive +/// [OMP] flush-directive +/// +Parser::StmtResult Parser::ParseCompoundStatement(bool isStmtExpr) { + assert(Tok.is(tok::l_brace) && "Not a compount stmt!"); + + // Enter a scope to hold everything within the compound stmt. Compound + // statements can always hold declarations. + EnterScope(Scope::DeclScope); + + // Parse the statements in the body. + StmtResult Body = ParseCompoundStatementBody(isStmtExpr); + + ExitScope(); + return Body; +} + + +/// ParseCompoundStatementBody - Parse a sequence of statements and invoke the +/// ActOnCompoundStmt action. This expects the '{' to be the current token, and +/// consume the '}' at the end of the block. It does not manipulate the scope +/// stack. +Parser::StmtResult Parser::ParseCompoundStatementBody(bool isStmtExpr) { + SourceLocation LBraceLoc = ConsumeBrace(); // eat the '{'. + + // TODO: "__label__ X, Y, Z;" is the GNU "Local Label" extension. These are + // only allowed at the start of a compound stmt regardless of the language. + + llvm::SmallVector<StmtTy*, 32> Stmts; + while (Tok.isNot(tok::r_brace) && Tok.isNot(tok::eof)) { + StmtResult R; + if (Tok.isNot(tok::kw___extension__)) { + R = ParseStatementOrDeclaration(false); + } else { + // __extension__ can start declarations and it can also be a unary + // operator for expressions. Consume multiple __extension__ markers here + // until we can determine which is which. + SourceLocation ExtLoc = ConsumeToken(); + while (Tok.is(tok::kw___extension__)) + ConsumeToken(); + + // __extension__ silences extension warnings in the subexpression. + bool SavedExtWarn = Diags.getWarnOnExtensions(); + Diags.setWarnOnExtensions(false); + + // If this is the start of a declaration, parse it as such. + if (isDeclarationSpecifier()) { + // FIXME: Save the __extension__ on the decl as a node somehow. + SourceLocation DeclStart = Tok.getLocation(); + DeclTy *Res = ParseDeclaration(Declarator::BlockContext); + // FIXME: Pass in the right location for the end of the declstmt. + R = Actions.ActOnDeclStmt(Res, DeclStart, DeclStart); + + Diags.setWarnOnExtensions(SavedExtWarn); + } else { + // Otherwise this was a unary __extension__ marker. Parse the + // subexpression and add the __extension__ unary op. + ExprResult Res = ParseCastExpression(false); + Diags.setWarnOnExtensions(SavedExtWarn); + + if (Res.isInvalid) { + SkipUntil(tok::semi); + continue; + } + + // Add the __extension__ node to the AST. + Res = Actions.ActOnUnaryOp(ExtLoc, tok::kw___extension__, Res.Val); + if (Res.isInvalid) + continue; + + // Eat the semicolon at the end of stmt and convert the expr into a stmt. + ExpectAndConsume(tok::semi, diag::err_expected_semi_after_expr); + R = Actions.ActOnExprStmt(Res.Val); + } + } + + if (!R.isInvalid && R.Val) + Stmts.push_back(R.Val); + } + + // We broke out of the while loop because we found a '}' or EOF. + if (Tok.isNot(tok::r_brace)) { + Diag(Tok, diag::err_expected_rbrace); + return true; + } + + SourceLocation RBraceLoc = ConsumeBrace(); + return Actions.ActOnCompoundStmt(LBraceLoc, RBraceLoc, + &Stmts[0], Stmts.size(), isStmtExpr); +} + +/// ParseIfStatement +/// if-statement: [C99 6.8.4.1] +/// 'if' '(' expression ')' statement +/// 'if' '(' expression ')' statement 'else' statement +/// +Parser::StmtResult Parser::ParseIfStatement() { + assert(Tok.is(tok::kw_if) && "Not an if stmt!"); + SourceLocation IfLoc = ConsumeToken(); // eat the 'if'. + + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_expected_lparen_after, "if"); + SkipUntil(tok::semi); + return true; + } + + // C99 6.8.4p3 - In C99, the if statement is a block. This is not + // the case for C90. + if (getLang().C99) + EnterScope(Scope::DeclScope); + + // Parse the condition. + ExprResult CondExp = ParseSimpleParenExpression(); + if (CondExp.isInvalid) { + SkipUntil(tok::semi); + if (getLang().C99) + ExitScope(); + return true; + } + + // C99 6.8.4p3 - In C99, the body of the if statement is a scope, even if + // there is no compound stmt. C90 does not have this clause. We only do this + // if the body isn't a compound statement to avoid push/pop in common cases. + bool NeedsInnerScope = getLang().C99 && Tok.isNot(tok::l_brace); + if (NeedsInnerScope) EnterScope(Scope::DeclScope); + + // Read the 'then' stmt. + SourceLocation ThenStmtLoc = Tok.getLocation(); + StmtResult ThenStmt = ParseStatement(); + + // Pop the 'if' scope if needed. + if (NeedsInnerScope) ExitScope(); + + // If it has an else, parse it. + SourceLocation ElseLoc; + SourceLocation ElseStmtLoc; + StmtResult ElseStmt(false); + + if (Tok.is(tok::kw_else)) { + ElseLoc = ConsumeToken(); + + // C99 6.8.4p3 - In C99, the body of the if statement is a scope, even if + // there is no compound stmt. C90 does not have this clause. We only do + // this if the body isn't a compound statement to avoid push/pop in common + // cases. + NeedsInnerScope = getLang().C99 && Tok.isNot(tok::l_brace); + if (NeedsInnerScope) EnterScope(Scope::DeclScope); + + ElseStmtLoc = Tok.getLocation(); + ElseStmt = ParseStatement(); + + // Pop the 'else' scope if needed. + if (NeedsInnerScope) ExitScope(); + } + + if (getLang().C99) + ExitScope(); + + // If the then or else stmt is invalid and the other is valid (and present), + // make turn the invalid one into a null stmt to avoid dropping the other + // part. If both are invalid, return error. + if ((ThenStmt.isInvalid && ElseStmt.isInvalid) || + (ThenStmt.isInvalid && ElseStmt.Val == 0) || + (ThenStmt.Val == 0 && ElseStmt.isInvalid)) { + // Both invalid, or one is invalid and other is non-present: delete cond and + // return error. + Actions.DeleteExpr(CondExp.Val); + return true; + } + + // Now if either are invalid, replace with a ';'. + if (ThenStmt.isInvalid) + ThenStmt = Actions.ActOnNullStmt(ThenStmtLoc); + if (ElseStmt.isInvalid) + ElseStmt = Actions.ActOnNullStmt(ElseStmtLoc); + + return Actions.ActOnIfStmt(IfLoc, CondExp.Val, ThenStmt.Val, + ElseLoc, ElseStmt.Val); +} + +/// ParseSwitchStatement +/// switch-statement: +/// 'switch' '(' expression ')' statement +Parser::StmtResult Parser::ParseSwitchStatement() { + assert(Tok.is(tok::kw_switch) && "Not a switch stmt!"); + SourceLocation SwitchLoc = ConsumeToken(); // eat the 'switch'. + + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_expected_lparen_after, "switch"); + SkipUntil(tok::semi); + return true; + } + + // C99 6.8.4p3 - In C99, the switch statement is a block. This is + // not the case for C90. Start the switch scope. + if (getLang().C99) + EnterScope(Scope::BreakScope|Scope::DeclScope); + else + EnterScope(Scope::BreakScope); + + // Parse the condition. + ExprResult Cond = ParseSimpleParenExpression(); + + if (Cond.isInvalid) { + ExitScope(); + return true; + } + + StmtResult Switch = Actions.ActOnStartOfSwitchStmt(Cond.Val); + + // C99 6.8.4p3 - In C99, the body of the switch statement is a scope, even if + // there is no compound stmt. C90 does not have this clause. We only do this + // if the body isn't a compound statement to avoid push/pop in common cases. + bool NeedsInnerScope = getLang().C99 && Tok.isNot(tok::l_brace); + if (NeedsInnerScope) EnterScope(Scope::DeclScope); + + // Read the body statement. + StmtResult Body = ParseStatement(); + + // Pop the body scope if needed. + if (NeedsInnerScope) ExitScope(); + + if (Body.isInvalid) { + Body = Actions.ActOnNullStmt(Tok.getLocation()); + // FIXME: Remove the case statement list from the Switch statement. + } + + ExitScope(); + + return Actions.ActOnFinishSwitchStmt(SwitchLoc, Switch.Val, Body.Val); +} + +/// ParseWhileStatement +/// while-statement: [C99 6.8.5.1] +/// 'while' '(' expression ')' statement +Parser::StmtResult Parser::ParseWhileStatement() { + assert(Tok.is(tok::kw_while) && "Not a while stmt!"); + SourceLocation WhileLoc = Tok.getLocation(); + ConsumeToken(); // eat the 'while'. + + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_expected_lparen_after, "while"); + SkipUntil(tok::semi); + return true; + } + + // C99 6.8.5p5 - In C99, the while statement is a block. This is not + // the case for C90. Start the loop scope. + if (getLang().C99) + EnterScope(Scope::BreakScope | Scope::ContinueScope | Scope::DeclScope); + else + EnterScope(Scope::BreakScope | Scope::ContinueScope); + + // Parse the condition. + ExprResult Cond = ParseSimpleParenExpression(); + + // C99 6.8.5p5 - In C99, the body of the if statement is a scope, even if + // there is no compound stmt. C90 does not have this clause. We only do this + // if the body isn't a compound statement to avoid push/pop in common cases. + bool NeedsInnerScope = getLang().C99 && Tok.isNot(tok::l_brace); + if (NeedsInnerScope) EnterScope(Scope::DeclScope); + + // Read the body statement. + StmtResult Body = ParseStatement(); + + // Pop the body scope if needed. + if (NeedsInnerScope) ExitScope(); + + ExitScope(); + + if (Cond.isInvalid || Body.isInvalid) return true; + + return Actions.ActOnWhileStmt(WhileLoc, Cond.Val, Body.Val); +} + +/// ParseDoStatement +/// do-statement: [C99 6.8.5.2] +/// 'do' statement 'while' '(' expression ')' ';' +/// Note: this lets the caller parse the end ';'. +Parser::StmtResult Parser::ParseDoStatement() { + assert(Tok.is(tok::kw_do) && "Not a do stmt!"); + SourceLocation DoLoc = ConsumeToken(); // eat the 'do'. + + // C99 6.8.5p5 - In C99, the do statement is a block. This is not + // the case for C90. Start the loop scope. + if (getLang().C99) + EnterScope(Scope::BreakScope | Scope::ContinueScope | Scope::DeclScope); + else + EnterScope(Scope::BreakScope | Scope::ContinueScope); + + // C99 6.8.5p5 - In C99, the body of the if statement is a scope, even if + // there is no compound stmt. C90 does not have this clause. We only do this + // if the body isn't a compound statement to avoid push/pop in common cases. + bool NeedsInnerScope = getLang().C99 && Tok.isNot(tok::l_brace); + if (NeedsInnerScope) EnterScope(Scope::DeclScope); + + // Read the body statement. + StmtResult Body = ParseStatement(); + + // Pop the body scope if needed. + if (NeedsInnerScope) ExitScope(); + + if (Tok.isNot(tok::kw_while)) { + ExitScope(); + Diag(Tok, diag::err_expected_while); + Diag(DoLoc, diag::err_matching, "do"); + SkipUntil(tok::semi); + return true; + } + SourceLocation WhileLoc = ConsumeToken(); + + if (Tok.isNot(tok::l_paren)) { + ExitScope(); + Diag(Tok, diag::err_expected_lparen_after, "do/while"); + SkipUntil(tok::semi); + return true; + } + + // Parse the condition. + ExprResult Cond = ParseSimpleParenExpression(); + + ExitScope(); + + if (Cond.isInvalid || Body.isInvalid) return true; + + return Actions.ActOnDoStmt(DoLoc, Body.Val, WhileLoc, Cond.Val); +} + +/// ParseForStatement +/// for-statement: [C99 6.8.5.3] +/// 'for' '(' expr[opt] ';' expr[opt] ';' expr[opt] ')' statement +/// 'for' '(' declaration expr[opt] ';' expr[opt] ')' statement +/// [OBJC2] 'for' '(' declaration 'in' expr ')' statement +/// [OBJC2] 'for' '(' expr 'in' expr ')' statement +Parser::StmtResult Parser::ParseForStatement() { + assert(Tok.is(tok::kw_for) && "Not a for stmt!"); + SourceLocation ForLoc = ConsumeToken(); // eat the 'for'. + + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_expected_lparen_after, "for"); + SkipUntil(tok::semi); + return true; + } + + // C99 6.8.5p5 - In C99, the for statement is a block. This is not + // the case for C90. Start the loop scope. + if (getLang().C99) + EnterScope(Scope::BreakScope | Scope::ContinueScope | Scope::DeclScope); + else + EnterScope(Scope::BreakScope | Scope::ContinueScope); + + SourceLocation LParenLoc = ConsumeParen(); + ExprResult Value; + + StmtTy *FirstPart = 0; + ExprTy *SecondPart = 0; + StmtTy *ThirdPart = 0; + bool ForEach = false; + + // Parse the first part of the for specifier. + if (Tok.is(tok::semi)) { // for (; + // no first part, eat the ';'. + ConsumeToken(); + } else if (isDeclarationSpecifier()) { // for (int X = 4; + // Parse declaration, which eats the ';'. + if (!getLang().C99) // Use of C99-style for loops in C90 mode? + Diag(Tok, diag::ext_c99_variable_decl_in_for_loop); + + SourceLocation DeclStart = Tok.getLocation(); + DeclTy *aBlockVarDecl = ParseDeclaration(Declarator::ForContext); + // FIXME: Pass in the right location for the end of the declstmt. + StmtResult stmtResult = Actions.ActOnDeclStmt(aBlockVarDecl, DeclStart, + DeclStart); + FirstPart = stmtResult.isInvalid ? 0 : stmtResult.Val; + if ((ForEach = isTokIdentifier_in())) { + ConsumeToken(); // consume 'in' + Value = ParseExpression(); + if (!Value.isInvalid) + SecondPart = Value.Val; + } + } else { + Value = ParseExpression(); + + // Turn the expression into a stmt. + if (!Value.isInvalid) { + StmtResult R = Actions.ActOnExprStmt(Value.Val); + if (!R.isInvalid) + FirstPart = R.Val; + } + + if (Tok.is(tok::semi)) { + ConsumeToken(); + } + else if ((ForEach = isTokIdentifier_in())) { + ConsumeToken(); // consume 'in' + Value = ParseExpression(); + if (!Value.isInvalid) + SecondPart = Value.Val; + } + else { + if (!Value.isInvalid) Diag(Tok, diag::err_expected_semi_for); + SkipUntil(tok::semi); + } + } + if (!ForEach) { + // Parse the second part of the for specifier. + if (Tok.is(tok::semi)) { // for (...;; + // no second part. + Value = ExprResult(); + } else { + Value = ParseExpression(); + if (!Value.isInvalid) + SecondPart = Value.Val; + } + + if (Tok.is(tok::semi)) { + ConsumeToken(); + } else { + if (!Value.isInvalid) Diag(Tok, diag::err_expected_semi_for); + SkipUntil(tok::semi); + } + + // Parse the third part of the for specifier. + if (Tok.is(tok::r_paren)) { // for (...;...;) + // no third part. + Value = ExprResult(); + } else { + Value = ParseExpression(); + if (!Value.isInvalid) { + // Turn the expression into a stmt. + StmtResult R = Actions.ActOnExprStmt(Value.Val); + if (!R.isInvalid) + ThirdPart = R.Val; + } + } + } + // Match the ')'. + SourceLocation RParenLoc = MatchRHSPunctuation(tok::r_paren, LParenLoc); + + // C99 6.8.5p5 - In C99, the body of the if statement is a scope, even if + // there is no compound stmt. C90 does not have this clause. We only do this + // if the body isn't a compound statement to avoid push/pop in common cases. + bool NeedsInnerScope = getLang().C99 && Tok.isNot(tok::l_brace); + if (NeedsInnerScope) EnterScope(Scope::DeclScope); + + // Read the body statement. + StmtResult Body = ParseStatement(); + + // Pop the body scope if needed. + if (NeedsInnerScope) ExitScope(); + + // Leave the for-scope. + ExitScope(); + + if (Body.isInvalid) + return Body; + + if (!ForEach) + return Actions.ActOnForStmt(ForLoc, LParenLoc, FirstPart, + SecondPart, ThirdPart, RParenLoc, Body.Val); + else + return Actions.ActOnObjCForCollectionStmt(ForLoc, LParenLoc, FirstPart, + SecondPart, RParenLoc, Body.Val); +} + +/// ParseGotoStatement +/// jump-statement: +/// 'goto' identifier ';' +/// [GNU] 'goto' '*' expression ';' +/// +/// Note: this lets the caller parse the end ';'. +/// +Parser::StmtResult Parser::ParseGotoStatement() { + assert(Tok.is(tok::kw_goto) && "Not a goto stmt!"); + SourceLocation GotoLoc = ConsumeToken(); // eat the 'goto'. + + StmtResult Res; + if (Tok.is(tok::identifier)) { + Res = Actions.ActOnGotoStmt(GotoLoc, Tok.getLocation(), + Tok.getIdentifierInfo()); + ConsumeToken(); + } else if (Tok.is(tok::star) && !getLang().NoExtensions) { + // GNU indirect goto extension. + Diag(Tok, diag::ext_gnu_indirect_goto); + SourceLocation StarLoc = ConsumeToken(); + ExprResult R = ParseExpression(); + if (R.isInvalid) { // Skip to the semicolon, but don't consume it. + SkipUntil(tok::semi, false, true); + return true; + } + Res = Actions.ActOnIndirectGotoStmt(GotoLoc, StarLoc, R.Val); + } else { + Diag(Tok, diag::err_expected_ident); + return true; + } + + return Res; +} + +/// ParseContinueStatement +/// jump-statement: +/// 'continue' ';' +/// +/// Note: this lets the caller parse the end ';'. +/// +Parser::StmtResult Parser::ParseContinueStatement() { + SourceLocation ContinueLoc = ConsumeToken(); // eat the 'continue'. + return Actions.ActOnContinueStmt(ContinueLoc, CurScope); +} + +/// ParseBreakStatement +/// jump-statement: +/// 'break' ';' +/// +/// Note: this lets the caller parse the end ';'. +/// +Parser::StmtResult Parser::ParseBreakStatement() { + SourceLocation BreakLoc = ConsumeToken(); // eat the 'break'. + return Actions.ActOnBreakStmt(BreakLoc, CurScope); +} + +/// ParseReturnStatement +/// jump-statement: +/// 'return' expression[opt] ';' +Parser::StmtResult Parser::ParseReturnStatement() { + assert(Tok.is(tok::kw_return) && "Not a return stmt!"); + SourceLocation ReturnLoc = ConsumeToken(); // eat the 'return'. + + ExprResult R(0); + if (Tok.isNot(tok::semi)) { + R = ParseExpression(); + if (R.isInvalid) { // Skip to the semicolon, but don't consume it. + SkipUntil(tok::semi, false, true); + return true; + } + } + return Actions.ActOnReturnStmt(ReturnLoc, R.Val); +} + +/// FuzzyParseMicrosoftAsmStatement. When -fms-extensions is enabled, this +/// routine is called to skip/ignore tokens that comprise the MS asm statement. +Parser::StmtResult Parser::FuzzyParseMicrosoftAsmStatement() { + if (Tok.is(tok::l_brace)) { + unsigned short savedBraceCount = BraceCount; + do { + ConsumeAnyToken(); + } while (BraceCount > savedBraceCount && Tok.isNot(tok::eof)); + } else { + // From the MS website: If used without braces, the __asm keyword means + // that the rest of the line is an assembly-language statement. + SourceManager &SrcMgr = PP.getSourceManager(); + SourceLocation TokLoc = Tok.getLocation(); + unsigned lineNo = SrcMgr.getLogicalLineNumber(TokLoc); + do { + ConsumeAnyToken(); + TokLoc = Tok.getLocation(); + } while ((SrcMgr.getLogicalLineNumber(TokLoc) == lineNo) && + Tok.isNot(tok::r_brace) && Tok.isNot(tok::semi) && + Tok.isNot(tok::eof)); + } + return false; +} + +/// ParseAsmStatement - Parse a GNU extended asm statement. +/// asm-statement: +/// gnu-asm-statement +/// ms-asm-statement +/// +/// [GNU] gnu-asm-statement: +/// 'asm' type-qualifier[opt] '(' asm-argument ')' ';' +/// +/// [GNU] asm-argument: +/// asm-string-literal +/// asm-string-literal ':' asm-operands[opt] +/// asm-string-literal ':' asm-operands[opt] ':' asm-operands[opt] +/// asm-string-literal ':' asm-operands[opt] ':' asm-operands[opt] +/// ':' asm-clobbers +/// +/// [GNU] asm-clobbers: +/// asm-string-literal +/// asm-clobbers ',' asm-string-literal +/// +/// [MS] ms-asm-statement: +/// '__asm' assembly-instruction ';'[opt] +/// '__asm' '{' assembly-instruction-list '}' ';'[opt] +/// +/// [MS] assembly-instruction-list: +/// assembly-instruction ';'[opt] +/// assembly-instruction-list ';' assembly-instruction ';'[opt] +/// +Parser::StmtResult Parser::ParseAsmStatement(bool &msAsm) { + assert(Tok.is(tok::kw_asm) && "Not an asm stmt"); + SourceLocation AsmLoc = ConsumeToken(); + + if (getLang().Microsoft && Tok.isNot(tok::l_paren) && !isTypeQualifier()) { + msAsm = true; + return FuzzyParseMicrosoftAsmStatement(); + } + DeclSpec DS; + SourceLocation Loc = Tok.getLocation(); + ParseTypeQualifierListOpt(DS); + + // GNU asms accept, but warn, about type-qualifiers other than volatile. + if (DS.getTypeQualifiers() & DeclSpec::TQ_const) + Diag(Loc, diag::w_asm_qualifier_ignored, "const"); + if (DS.getTypeQualifiers() & DeclSpec::TQ_restrict) + Diag(Loc, diag::w_asm_qualifier_ignored, "restrict"); + + // Remember if this was a volatile asm. + bool isVolatile = DS.getTypeQualifiers() & DeclSpec::TQ_volatile; + bool isSimple = false; + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_expected_lparen_after, "asm"); + SkipUntil(tok::r_paren); + return true; + } + Loc = ConsumeParen(); + + ExprResult AsmString = ParseAsmStringLiteral(); + if (AsmString.isInvalid) + return true; + + llvm::SmallVector<std::string, 4> Names; + llvm::SmallVector<ExprTy*, 4> Constraints; + llvm::SmallVector<ExprTy*, 4> Exprs; + llvm::SmallVector<ExprTy*, 4> Clobbers; + + unsigned NumInputs = 0, NumOutputs = 0; + + SourceLocation RParenLoc; + if (Tok.is(tok::r_paren)) { + // We have a simple asm expression + isSimple = true; + + RParenLoc = ConsumeParen(); + } else { + // Parse Outputs, if present. + if (ParseAsmOperandsOpt(Names, Constraints, Exprs)) + return true; + + NumOutputs = Names.size(); + + // Parse Inputs, if present. + if (ParseAsmOperandsOpt(Names, Constraints, Exprs)) + return true; + + assert(Names.size() == Constraints.size() && + Constraints.size() == Exprs.size() + && "Input operand size mismatch!"); + + NumInputs = Names.size() - NumOutputs; + + // Parse the clobbers, if present. + if (Tok.is(tok::colon)) { + ConsumeToken(); + + // Parse the asm-string list for clobbers. + while (1) { + ExprResult Clobber = ParseAsmStringLiteral(); + + if (Clobber.isInvalid) + break; + + Clobbers.push_back(Clobber.Val); + + if (Tok.isNot(tok::comma)) break; + ConsumeToken(); + } + } + + RParenLoc = MatchRHSPunctuation(tok::r_paren, Loc); + } + + return Actions.ActOnAsmStmt(AsmLoc, isSimple, isVolatile, + NumOutputs, NumInputs, + &Names[0], &Constraints[0], &Exprs[0], + AsmString.Val, + Clobbers.size(), &Clobbers[0], + RParenLoc); +} + +/// ParseAsmOperands - Parse the asm-operands production as used by +/// asm-statement. We also parse a leading ':' token. If the leading colon is +/// not present, we do not parse anything. +/// +/// [GNU] asm-operands: +/// asm-operand +/// asm-operands ',' asm-operand +/// +/// [GNU] asm-operand: +/// asm-string-literal '(' expression ')' +/// '[' identifier ']' asm-string-literal '(' expression ')' +/// +bool Parser::ParseAsmOperandsOpt(llvm::SmallVectorImpl<std::string> &Names, + llvm::SmallVectorImpl<ExprTy*> &Constraints, + llvm::SmallVectorImpl<ExprTy*> &Exprs) { + // Only do anything if this operand is present. + if (Tok.isNot(tok::colon)) return false; + ConsumeToken(); + + // 'asm-operands' isn't present? + if (!isTokenStringLiteral() && Tok.isNot(tok::l_square)) + return false; + + while (1) { + // Read the [id] if present. + if (Tok.is(tok::l_square)) { + SourceLocation Loc = ConsumeBracket(); + + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + SkipUntil(tok::r_paren); + return true; + } + + IdentifierInfo *II = Tok.getIdentifierInfo(); + ConsumeToken(); + + Names.push_back(std::string(II->getName(), II->getLength())); + MatchRHSPunctuation(tok::r_square, Loc); + } else + Names.push_back(std::string()); + + ExprResult Constraint = ParseAsmStringLiteral(); + if (Constraint.isInvalid) { + SkipUntil(tok::r_paren); + return true; + } + Constraints.push_back(Constraint.Val); + + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_expected_lparen_after, "asm operand"); + SkipUntil(tok::r_paren); + return true; + } + + // Read the parenthesized expression. + ExprResult Res = ParseSimpleParenExpression(); + if (Res.isInvalid) { + SkipUntil(tok::r_paren); + return true; + } + Exprs.push_back(Res.Val); + // Eat the comma and continue parsing if it exists. + if (Tok.isNot(tok::comma)) return false; + ConsumeToken(); + } + + return true; +} + +Parser::DeclTy *Parser::ParseFunctionStatementBody(DeclTy *Decl, + SourceLocation L, SourceLocation R) { + // Do not enter a scope for the brace, as the arguments are in the same scope + // (the function body) as the body itself. Instead, just read the statement + // list and put it into a CompoundStmt for safe keeping. + StmtResult FnBody = ParseCompoundStatementBody(); + + // If the function body could not be parsed, make a bogus compoundstmt. + if (FnBody.isInvalid) + FnBody = Actions.ActOnCompoundStmt(L, R, 0, 0, false); + + // Leave the function body scope. + ExitScope(); + + return Actions.ActOnFinishFunctionBody(Decl, FnBody.Val); +} diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp new file mode 100644 index 00000000000..703144b3c44 --- /dev/null +++ b/clang/lib/Parse/Parser.cpp @@ -0,0 +1,647 @@ +//===--- Parser.cpp - C Language Family Parser ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Parser interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/Parser.h" +#include "clang/Parse/DeclSpec.h" +#include "clang/Parse/Scope.h" +using namespace clang; + +Parser::Parser(Preprocessor &pp, Action &actions) + : PP(pp), Actions(actions), Diags(PP.getDiagnostics()) { + Tok.setKind(tok::eof); + CurScope = 0; + NumCachedScopes = 0; + ParenCount = BracketCount = BraceCount = 0; + ObjCImpDecl = 0; +} + +/// Out-of-line virtual destructor to provide home for Action class. +Action::~Action() {} + + +void Parser::Diag(SourceLocation Loc, unsigned DiagID, + const std::string &Msg) { + Diags.Report(FullSourceLoc(Loc,PP.getSourceManager()), DiagID, &Msg, 1); +} + +/// MatchRHSPunctuation - For punctuation with a LHS and RHS (e.g. '['/']'), +/// this helper function matches and consumes the specified RHS token if +/// present. If not present, it emits the specified diagnostic indicating +/// that the parser failed to match the RHS of the token at LHSLoc. LHSName +/// should be the name of the unmatched LHS token. +SourceLocation Parser::MatchRHSPunctuation(tok::TokenKind RHSTok, + SourceLocation LHSLoc) { + + if (Tok.is(RHSTok)) + return ConsumeAnyToken(); + + SourceLocation R = Tok.getLocation(); + const char *LHSName = "unknown"; + diag::kind DID = diag::err_parse_error; + switch (RHSTok) { + default: break; + case tok::r_paren : LHSName = "("; DID = diag::err_expected_rparen; break; + case tok::r_brace : LHSName = "{"; DID = diag::err_expected_rbrace; break; + case tok::r_square: LHSName = "["; DID = diag::err_expected_rsquare; break; + case tok::greater: LHSName = "<"; DID = diag::err_expected_greater; break; + } + Diag(Tok, DID); + Diag(LHSLoc, diag::err_matching, LHSName); + SkipUntil(RHSTok); + return R; +} + +/// ExpectAndConsume - The parser expects that 'ExpectedTok' is next in the +/// input. If so, it is consumed and false is returned. +/// +/// If the input is malformed, this emits the specified diagnostic. Next, if +/// SkipToTok is specified, it calls SkipUntil(SkipToTok). Finally, true is +/// returned. +bool Parser::ExpectAndConsume(tok::TokenKind ExpectedTok, unsigned DiagID, + const char *Msg, tok::TokenKind SkipToTok) { + if (Tok.is(ExpectedTok)) { + ConsumeAnyToken(); + return false; + } + + Diag(Tok, DiagID, Msg); + if (SkipToTok != tok::unknown) + SkipUntil(SkipToTok); + return true; +} + +//===----------------------------------------------------------------------===// +// Error recovery. +//===----------------------------------------------------------------------===// + +/// SkipUntil - Read tokens until we get to the specified token, then consume +/// it (unless DontConsume is true). Because we cannot guarantee that the +/// token will ever occur, this skips to the next token, or to some likely +/// good stopping point. If StopAtSemi is true, skipping will stop at a ';' +/// character. +/// +/// If SkipUntil finds the specified token, it returns true, otherwise it +/// returns false. +bool Parser::SkipUntil(const tok::TokenKind *Toks, unsigned NumToks, + bool StopAtSemi, bool DontConsume) { + // We always want this function to skip at least one token if the first token + // isn't T and if not at EOF. + bool isFirstTokenSkipped = true; + while (1) { + // If we found one of the tokens, stop and return true. + for (unsigned i = 0; i != NumToks; ++i) { + if (Tok.is(Toks[i])) { + if (DontConsume) { + // Noop, don't consume the token. + } else { + ConsumeAnyToken(); + } + return true; + } + } + + switch (Tok.getKind()) { + case tok::eof: + // Ran out of tokens. + return false; + + case tok::l_paren: + // Recursively skip properly-nested parens. + ConsumeParen(); + SkipUntil(tok::r_paren, false); + break; + case tok::l_square: + // Recursively skip properly-nested square brackets. + ConsumeBracket(); + SkipUntil(tok::r_square, false); + break; + case tok::l_brace: + // Recursively skip properly-nested braces. + ConsumeBrace(); + SkipUntil(tok::r_brace, false); + break; + + // Okay, we found a ']' or '}' or ')', which we think should be balanced. + // Since the user wasn't looking for this token (if they were, it would + // already be handled), this isn't balanced. If there is a LHS token at a + // higher level, we will assume that this matches the unbalanced token + // and return it. Otherwise, this is a spurious RHS token, which we skip. + case tok::r_paren: + if (ParenCount && !isFirstTokenSkipped) + return false; // Matches something. + ConsumeParen(); + break; + case tok::r_square: + if (BracketCount && !isFirstTokenSkipped) + return false; // Matches something. + ConsumeBracket(); + break; + case tok::r_brace: + if (BraceCount && !isFirstTokenSkipped) + return false; // Matches something. + ConsumeBrace(); + break; + + case tok::string_literal: + case tok::wide_string_literal: + ConsumeStringToken(); + break; + case tok::semi: + if (StopAtSemi) + return false; + // FALL THROUGH. + default: + // Skip this token. + ConsumeToken(); + break; + } + isFirstTokenSkipped = false; + } +} + +//===----------------------------------------------------------------------===// +// Scope manipulation +//===----------------------------------------------------------------------===// + +/// EnterScope - Start a new scope. +void Parser::EnterScope(unsigned ScopeFlags) { + if (NumCachedScopes) { + Scope *N = ScopeCache[--NumCachedScopes]; + N->Init(CurScope, ScopeFlags); + CurScope = N; + } else { + CurScope = new Scope(CurScope, ScopeFlags); + } +} + +/// ExitScope - Pop a scope off the scope stack. +void Parser::ExitScope() { + assert(CurScope && "Scope imbalance!"); + + // Inform the actions module that this scope is going away if there are any + // decls in it. + if (!CurScope->decl_empty()) + Actions.ActOnPopScope(Tok.getLocation(), CurScope); + + Scope *OldScope = CurScope; + CurScope = OldScope->getParent(); + + if (NumCachedScopes == ScopeCacheSize) + delete OldScope; + else + ScopeCache[NumCachedScopes++] = OldScope; +} + + + + +//===----------------------------------------------------------------------===// +// C99 6.9: External Definitions. +//===----------------------------------------------------------------------===// + +Parser::~Parser() { + // If we still have scopes active, delete the scope tree. + delete CurScope; + + // Free the scope cache. + for (unsigned i = 0, e = NumCachedScopes; i != e; ++i) + delete ScopeCache[i]; +} + +/// Initialize - Warm up the parser. +/// +void Parser::Initialize() { + // Prime the lexer look-ahead. + ConsumeToken(); + + // Create the translation unit scope. Install it as the current scope. + assert(CurScope == 0 && "A scope is already active?"); + EnterScope(Scope::DeclScope); + Actions.ActOnTranslationUnitScope(Tok.getLocation(), CurScope); + + if (Tok.is(tok::eof) && + !getLang().CPlusPlus) // Empty source file is an extension in C + Diag(Tok, diag::ext_empty_source_file); + + // Initialization for Objective-C context sensitive keywords recognition. + // Referenced in Parser::ParseObjCTypeQualifierList. + if (getLang().ObjC1) { + ObjCTypeQuals[objc_in] = &PP.getIdentifierTable().get("in"); + ObjCTypeQuals[objc_out] = &PP.getIdentifierTable().get("out"); + ObjCTypeQuals[objc_inout] = &PP.getIdentifierTable().get("inout"); + ObjCTypeQuals[objc_oneway] = &PP.getIdentifierTable().get("oneway"); + ObjCTypeQuals[objc_bycopy] = &PP.getIdentifierTable().get("bycopy"); + ObjCTypeQuals[objc_byref] = &PP.getIdentifierTable().get("byref"); + } + if (getLang().ObjC2) { + ObjCPropertyAttrs[objc_readonly] = &PP.getIdentifierTable().get("readonly"); + ObjCPropertyAttrs[objc_getter] = &PP.getIdentifierTable().get("getter"); + ObjCPropertyAttrs[objc_setter] = &PP.getIdentifierTable().get("setter"); + ObjCPropertyAttrs[objc_assign] = &PP.getIdentifierTable().get("assign"); + ObjCPropertyAttrs[objc_readwrite] = + &PP.getIdentifierTable().get("readwrite"); + ObjCPropertyAttrs[objc_retain] = &PP.getIdentifierTable().get("retain"); + ObjCPropertyAttrs[objc_copy] = &PP.getIdentifierTable().get("copy"); + ObjCPropertyAttrs[objc_nonatomic] = + &PP.getIdentifierTable().get("nonatomic"); + ObjCForCollectionInKW = &PP.getIdentifierTable().get("in"); + } +} + +/// ParseTopLevelDecl - Parse one top-level declaration, return whatever the +/// action tells us to. This returns true if the EOF was encountered. +bool Parser::ParseTopLevelDecl(DeclTy*& Result) { + Result = 0; + if (Tok.is(tok::eof)) return true; + + Result = ParseExternalDeclaration(); + return false; +} + +/// Finalize - Shut down the parser. +/// +void Parser::Finalize() { + ExitScope(); + assert(CurScope == 0 && "Scope imbalance!"); +} + +/// ParseTranslationUnit: +/// translation-unit: [C99 6.9] +/// external-declaration +/// translation-unit external-declaration +void Parser::ParseTranslationUnit() { + Initialize(); + + DeclTy *Res; + while (!ParseTopLevelDecl(Res)) + /*parse them all*/; + + Finalize(); +} + +/// ParseExternalDeclaration: +/// external-declaration: [C99 6.9] +/// function-definition +/// declaration +/// [EXT] ';' +/// [GNU] asm-definition +/// [GNU] __extension__ external-declaration +/// [OBJC] objc-class-definition +/// [OBJC] objc-class-declaration +/// [OBJC] objc-alias-declaration +/// [OBJC] objc-protocol-definition +/// [OBJC] objc-method-definition +/// [OBJC] @end +/// +/// [GNU] asm-definition: +/// simple-asm-expr ';' +/// +Parser::DeclTy *Parser::ParseExternalDeclaration() { + switch (Tok.getKind()) { + case tok::semi: + Diag(Tok, diag::ext_top_level_semi); + ConsumeToken(); + // TODO: Invoke action for top-level semicolon. + return 0; + case tok::kw___extension__: { + ConsumeToken(); + // FIXME: Disable extension warnings. + DeclTy *RV = ParseExternalDeclaration(); + // FIXME: Restore extension warnings. + return RV; + } + case tok::kw_asm: { + ExprResult Result = ParseSimpleAsm(); + + ExpectAndConsume(tok::semi, diag::err_expected_semi_after, + "top-level asm block"); + + if (!Result.isInvalid) + return Actions.ActOnFileScopeAsmDecl(Tok.getLocation(), Result.Val); + } + case tok::at: + // @ is not a legal token unless objc is enabled, no need to check. + return ParseObjCAtDirectives(); + case tok::minus: + case tok::plus: + if (getLang().ObjC1) + return ParseObjCMethodDefinition(); + else { + Diag(Tok, diag::err_expected_external_declaration); + ConsumeToken(); + } + return 0; + case tok::kw_namespace: + case tok::kw_typedef: + // A function definition cannot start with a these keywords. + return ParseDeclaration(Declarator::FileContext); + default: + // We can't tell whether this is a function-definition or declaration yet. + return ParseDeclarationOrFunctionDefinition(); + } +} + +/// ParseDeclarationOrFunctionDefinition - Parse either a function-definition or +/// a declaration. We can't tell which we have until we read up to the +/// compound-statement in function-definition. +/// +/// function-definition: [C99 6.9.1] +/// declaration-specifiers[opt] declarator declaration-list[opt] +/// compound-statement +/// declaration: [C99 6.7] +/// declaration-specifiers init-declarator-list[opt] ';' +/// [!C99] init-declarator-list ';' [TODO: warn in c99 mode] +/// [OMP] threadprivate-directive [TODO] +/// +Parser::DeclTy *Parser::ParseDeclarationOrFunctionDefinition() { + // Parse the common declaration-specifiers piece. + DeclSpec DS; + ParseDeclarationSpecifiers(DS); + + // C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };" + // declaration-specifiers init-declarator-list[opt] ';' + if (Tok.is(tok::semi)) { + ConsumeToken(); + return Actions.ParsedFreeStandingDeclSpec(CurScope, DS); + } + + // ObjC2 allows prefix attributes on class interfaces. + if (getLang().ObjC2 && Tok.is(tok::at)) { + SourceLocation AtLoc = ConsumeToken(); // the "@" + if (!Tok.isObjCAtKeyword(tok::objc_interface)) { + Diag(Tok, diag::err_objc_expected_property_attr);//FIXME:better diagnostic + SkipUntil(tok::semi); // FIXME: better skip? + return 0; + } + const char *PrevSpec = 0; + if (DS.SetTypeSpecType(DeclSpec::TST_unspecified, AtLoc, PrevSpec)) + Diag(AtLoc, diag::err_invalid_decl_spec_combination, PrevSpec); + return ParseObjCAtInterfaceDeclaration(AtLoc, DS.getAttributes()); + } + + // If the declspec consisted only of 'extern' and we have a string + // literal following it, this must be a C++ linkage specifier like + // 'extern "C"'. + if (Tok.is(tok::string_literal) && getLang().CPlusPlus && + DS.getStorageClassSpec() == DeclSpec::SCS_extern && + DS.getParsedSpecifiers() == DeclSpec::PQ_StorageClassSpecifier) + return ParseLinkage(Declarator::FileContext); + + // Parse the first declarator. + Declarator DeclaratorInfo(DS, Declarator::FileContext); + ParseDeclarator(DeclaratorInfo); + // Error parsing the declarator? + if (DeclaratorInfo.getIdentifier() == 0) { + // If so, skip until the semi-colon or a }. + SkipUntil(tok::r_brace, true); + if (Tok.is(tok::semi)) + ConsumeToken(); + return 0; + } + + // If the declarator is the start of a function definition, handle it. + if (Tok.is(tok::equal) || // int X()= -> not a function def + Tok.is(tok::comma) || // int X(), -> not a function def + Tok.is(tok::semi) || // int X(); -> not a function def + Tok.is(tok::kw_asm) || // int X() __asm__ -> not a function def + Tok.is(tok::kw___attribute)) { // int X() __attr__ -> not a function def + // FALL THROUGH. + } else if (DeclaratorInfo.isFunctionDeclarator() && + (Tok.is(tok::l_brace) || // int X() {} + isDeclarationSpecifier())) { // int X(f) int f; {} + if (DS.getStorageClassSpec() == DeclSpec::SCS_typedef) { + Diag(Tok, diag::err_function_declared_typedef); + + if (Tok.is(tok::l_brace)) { + // This recovery skips the entire function body. It would be nice + // to simply call ParseFunctionDefintion() below, however Sema + // assumes the declarator represents a function, not a typedef. + ConsumeBrace(); + SkipUntil(tok::r_brace, true); + } else { + SkipUntil(tok::semi); + } + return 0; + } + return ParseFunctionDefinition(DeclaratorInfo); + } else { + if (DeclaratorInfo.isFunctionDeclarator()) + Diag(Tok, diag::err_expected_fn_body); + else + Diag(Tok, diag::err_expected_after_declarator); + SkipUntil(tok::semi); + return 0; + } + + // Parse the init-declarator-list for a normal declaration. + return ParseInitDeclaratorListAfterFirstDeclarator(DeclaratorInfo); +} + +/// ParseFunctionDefinition - We parsed and verified that the specified +/// Declarator is well formed. If this is a K&R-style function, read the +/// parameters declaration-list, then start the compound-statement. +/// +/// declaration-specifiers[opt] declarator declaration-list[opt] +/// compound-statement [TODO] +/// +Parser::DeclTy *Parser::ParseFunctionDefinition(Declarator &D) { + const DeclaratorChunk &FnTypeInfo = D.getTypeObject(0); + assert(FnTypeInfo.Kind == DeclaratorChunk::Function && + "This isn't a function declarator!"); + const DeclaratorChunk::FunctionTypeInfo &FTI = FnTypeInfo.Fun; + + // If this declaration was formed with a K&R-style identifier list for the + // arguments, parse declarations for all of the args next. + // int foo(a,b) int a; float b; {} + if (!FTI.hasPrototype && FTI.NumArgs != 0) + ParseKNRParamDeclarations(D); + + // We should have an opening brace now. + if (Tok.isNot(tok::l_brace)) { + Diag(Tok, diag::err_expected_fn_body); + + // Skip over garbage, until we get to '{'. Don't eat the '{'. + SkipUntil(tok::l_brace, true, true); + + // If we didn't find the '{', bail out. + if (Tok.isNot(tok::l_brace)) + return 0; + } + + SourceLocation BraceLoc = Tok.getLocation(); + + // Enter a scope for the function body. + EnterScope(Scope::FnScope|Scope::DeclScope); + + // Tell the actions module that we have entered a function definition with the + // specified Declarator for the function. + DeclTy *Res = Actions.ActOnStartOfFunctionDef(CurScope, D); + + return ParseFunctionStatementBody(Res, BraceLoc, BraceLoc); +} + +/// ParseKNRParamDeclarations - Parse 'declaration-list[opt]' which provides +/// types for a function with a K&R-style identifier list for arguments. +void Parser::ParseKNRParamDeclarations(Declarator &D) { + // We know that the top-level of this declarator is a function. + DeclaratorChunk::FunctionTypeInfo &FTI = D.getTypeObject(0).Fun; + + // Read all the argument declarations. + while (isDeclarationSpecifier()) { + SourceLocation DSStart = Tok.getLocation(); + + // Parse the common declaration-specifiers piece. + DeclSpec DS; + ParseDeclarationSpecifiers(DS); + + // C99 6.9.1p6: 'each declaration in the declaration list shall have at + // least one declarator'. + // NOTE: GCC just makes this an ext-warn. It's not clear what it does with + // the declarations though. It's trivial to ignore them, really hard to do + // anything else with them. + if (Tok.is(tok::semi)) { + Diag(DSStart, diag::err_declaration_does_not_declare_param); + ConsumeToken(); + continue; + } + + // C99 6.9.1p6: Declarations shall contain no storage-class specifiers other + // than register. + if (DS.getStorageClassSpec() != DeclSpec::SCS_unspecified && + DS.getStorageClassSpec() != DeclSpec::SCS_register) { + Diag(DS.getStorageClassSpecLoc(), + diag::err_invalid_storage_class_in_func_decl); + DS.ClearStorageClassSpecs(); + } + if (DS.isThreadSpecified()) { + Diag(DS.getThreadSpecLoc(), + diag::err_invalid_storage_class_in_func_decl); + DS.ClearStorageClassSpecs(); + } + + // Parse the first declarator attached to this declspec. + Declarator ParmDeclarator(DS, Declarator::KNRTypeListContext); + ParseDeclarator(ParmDeclarator); + + // Handle the full declarator list. + while (1) { + DeclTy *AttrList; + // If attributes are present, parse them. + if (Tok.is(tok::kw___attribute)) + // FIXME: attach attributes too. + AttrList = ParseAttributes(); + + // Ask the actions module to compute the type for this declarator. + Action::TypeResult TR = + Actions.ActOnParamDeclaratorType(CurScope, ParmDeclarator); + + if (!TR.isInvalid && + // A missing identifier has already been diagnosed. + ParmDeclarator.getIdentifier()) { + + // Scan the argument list looking for the correct param to apply this + // type. + for (unsigned i = 0; ; ++i) { + // C99 6.9.1p6: those declarators shall declare only identifiers from + // the identifier list. + if (i == FTI.NumArgs) { + Diag(ParmDeclarator.getIdentifierLoc(), diag::err_no_matching_param, + ParmDeclarator.getIdentifier()->getName()); + break; + } + + if (FTI.ArgInfo[i].Ident == ParmDeclarator.getIdentifier()) { + // Reject redefinitions of parameters. + if (FTI.ArgInfo[i].TypeInfo) { + Diag(ParmDeclarator.getIdentifierLoc(), + diag::err_param_redefinition, + ParmDeclarator.getIdentifier()->getName()); + } else { + FTI.ArgInfo[i].TypeInfo = TR.Val; + } + break; + } + } + } + + // If we don't have a comma, it is either the end of the list (a ';') or + // an error, bail out. + if (Tok.isNot(tok::comma)) + break; + + // Consume the comma. + ConsumeToken(); + + // Parse the next declarator. + ParmDeclarator.clear(); + ParseDeclarator(ParmDeclarator); + } + + if (Tok.is(tok::semi)) { + ConsumeToken(); + } else { + Diag(Tok, diag::err_parse_error); + // Skip to end of block or statement + SkipUntil(tok::semi, true); + if (Tok.is(tok::semi)) + ConsumeToken(); + } + } + + // The actions module must verify that all arguments were declared. +} + + +/// ParseAsmStringLiteral - This is just a normal string-literal, but is not +/// allowed to be a wide string, and is not subject to character translation. +/// +/// [GNU] asm-string-literal: +/// string-literal +/// +Parser::ExprResult Parser::ParseAsmStringLiteral() { + if (!isTokenStringLiteral()) { + Diag(Tok, diag::err_expected_string_literal); + return true; + } + + ExprResult Res = ParseStringLiteralExpression(); + if (Res.isInvalid) return true; + + // TODO: Diagnose: wide string literal in 'asm' + + return Res; +} + +/// ParseSimpleAsm +/// +/// [GNU] simple-asm-expr: +/// 'asm' '(' asm-string-literal ')' +/// +Parser::ExprResult Parser::ParseSimpleAsm() { + assert(Tok.is(tok::kw_asm) && "Not an asm!"); + SourceLocation Loc = ConsumeToken(); + + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_expected_lparen_after, "asm"); + return 0; + } + + ConsumeParen(); + + ExprResult Result = ParseAsmStringLiteral(); + + MatchRHSPunctuation(tok::r_paren, Loc); + + return Result; +} + diff --git a/clang/lib/Rewrite/Makefile b/clang/lib/Rewrite/Makefile new file mode 100644 index 00000000000..3c0b5a57274 --- /dev/null +++ b/clang/lib/Rewrite/Makefile @@ -0,0 +1,22 @@ +##===- clang/lib/Rewrite/Makefile --------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements code transformation / rewriting facilities. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME := clangRewrite +BUILD_ARCHIVE = 1 +CXXFLAGS = -fno-rtti + +CPPFLAGS += -I$(PROJ_SRC_DIR)/../../include + +include $(LEVEL)/Makefile.common + diff --git a/clang/lib/Rewrite/Rewriter.cpp b/clang/lib/Rewrite/Rewriter.cpp new file mode 100644 index 00000000000..440d1d39fd5 --- /dev/null +++ b/clang/lib/Rewrite/Rewriter.cpp @@ -0,0 +1,258 @@ +//===--- Rewriter.cpp - Code rewriting interface --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Rewriter class, which is used for code +// transformations. +// +//===----------------------------------------------------------------------===// + +#include "clang/Rewrite/Rewriter.h" +#include "clang/AST/Stmt.h" +#include "clang/Lex/Lexer.h" +#include "clang/Basic/SourceManager.h" +#include <sstream> +using namespace clang; + +/// getMappedOffset - Given an offset into the original SourceBuffer that this +/// RewriteBuffer is based on, map it into the offset space of the +/// RewriteBuffer. +unsigned RewriteBuffer::getMappedOffset(unsigned OrigOffset, + bool AfterInserts) const { + unsigned ResultOffset = OrigOffset; + unsigned DeltaIdx = 0; + + // Move past any deltas that are relevant. + // FIXME: binary search. + for (; DeltaIdx != Deltas.size() && + Deltas[DeltaIdx].FileLoc < OrigOffset; ++DeltaIdx) + ResultOffset += Deltas[DeltaIdx].Delta; + + if (AfterInserts && DeltaIdx != Deltas.size() && + OrigOffset == Deltas[DeltaIdx].FileLoc) + ResultOffset += Deltas[DeltaIdx].Delta; + return ResultOffset; +} + +/// AddDelta - When a change is made that shifts around the text buffer, this +/// method is used to record that info. +void RewriteBuffer::AddDelta(unsigned OrigOffset, int Change) { + assert(Change != 0 && "Not changing anything"); + unsigned DeltaIdx = 0; + + // Skip over any unrelated deltas. + for (; DeltaIdx != Deltas.size() && + Deltas[DeltaIdx].FileLoc < OrigOffset; ++DeltaIdx) + ; + + // If there is no a delta for this offset, insert a new delta record. + if (DeltaIdx == Deltas.size() || OrigOffset != Deltas[DeltaIdx].FileLoc) { + // If this is a removal, check to see if this can be folded into + // a delta at the end of the deletion. For example, if we have: + // ABCXDEF (X inserted after C) and delete C, we want to end up with no + // delta because X basically replaced C. + if (Change < 0 && DeltaIdx != Deltas.size() && + OrigOffset-Change == Deltas[DeltaIdx].FileLoc) { + // Adjust the start of the delta to be the start of the deleted region. + Deltas[DeltaIdx].FileLoc += Change; + Deltas[DeltaIdx].Delta += Change; + + // If the delta becomes a noop, remove it. + if (Deltas[DeltaIdx].Delta == 0) + Deltas.erase(Deltas.begin()+DeltaIdx); + return; + } + + // Otherwise, create an entry and return. + Deltas.insert(Deltas.begin()+DeltaIdx, + SourceDelta::get(OrigOffset, Change)); + return; + } + + // Otherwise, we found a delta record at this offset, adjust it. + Deltas[DeltaIdx].Delta += Change; + + // If it is now dead, remove it. + if (Deltas[DeltaIdx].Delta == 0) + Deltas.erase(Deltas.begin()+DeltaIdx); +} + + +void RewriteBuffer::RemoveText(unsigned OrigOffset, unsigned Size) { + // Nothing to remove, exit early. + if (Size == 0) return; + + unsigned RealOffset = getMappedOffset(OrigOffset, true); + assert(RealOffset+Size < Buffer.size() && "Invalid location"); + + // Remove the dead characters. + RewriteRope::iterator I = Buffer.getAtOffset(RealOffset); + Buffer.erase(I, I+Size); + + // Add a delta so that future changes are offset correctly. + AddDelta(OrigOffset, -Size); +} + +void RewriteBuffer::InsertText(unsigned OrigOffset, + const char *StrData, unsigned StrLen) { + // Nothing to insert, exit early. + if (StrLen == 0) return; + + unsigned RealOffset = getMappedOffset(OrigOffset, true); + assert(RealOffset <= Buffer.size() && "Invalid location"); + + // Insert the new characters. + Buffer.insert(Buffer.getAtOffset(RealOffset), StrData, StrData+StrLen); + + // Add a delta so that future changes are offset correctly. + AddDelta(OrigOffset, StrLen); +} + +/// ReplaceText - This method replaces a range of characters in the input +/// buffer with a new string. This is effectively a combined "remove/insert" +/// operation. +void RewriteBuffer::ReplaceText(unsigned OrigOffset, unsigned OrigLength, + const char *NewStr, unsigned NewLength) { + unsigned RealOffset = getMappedOffset(OrigOffset, true); + assert(RealOffset+OrigLength <= Buffer.size() && "Invalid location"); + + // Overwrite the common piece. + unsigned CommonLength = std::min(OrigLength, NewLength); + std::copy(NewStr, NewStr+CommonLength, Buffer.getAtOffset(RealOffset)); + + // If replacing without shifting around, just overwrite the text. + if (OrigLength == NewLength) + return; + + // If inserting more than existed before, this is like an insertion. + if (NewLength > OrigLength) { + Buffer.insert(Buffer.getAtOffset(RealOffset+OrigLength), + NewStr+OrigLength, NewStr+NewLength); + } else { + // If inserting less than existed before, this is like a removal. + RewriteRope::iterator I = Buffer.getAtOffset(RealOffset+NewLength); + Buffer.erase(I, I+(OrigLength-NewLength)); + } + AddDelta(OrigOffset, NewLength-OrigLength); +} + + +//===----------------------------------------------------------------------===// +// Rewriter class +//===----------------------------------------------------------------------===// + +/// getRangeSize - Return the size in bytes of the specified range if they +/// are in the same file. If not, this returns -1. +int Rewriter::getRangeSize(SourceRange Range) const { + if (!isRewritable(Range.getBegin()) || + !isRewritable(Range.getEnd())) return -1; + + unsigned StartOff, StartFileID; + unsigned EndOff , EndFileID; + + StartOff = getLocationOffsetAndFileID(Range.getBegin(), StartFileID); + EndOff = getLocationOffsetAndFileID(Range.getEnd(), EndFileID); + + if (StartFileID != EndFileID) + return -1; + + // If edits have been made to this buffer, the delta between the range may + // have changed. + std::map<unsigned, RewriteBuffer>::const_iterator I = + RewriteBuffers.find(StartFileID); + if (I != RewriteBuffers.end()) { + const RewriteBuffer &RB = I->second; + EndOff = RB.getMappedOffset(EndOff, true); + StartOff = RB.getMappedOffset(StartOff); + } + + + // Adjust the end offset to the end of the last token, instead of being the + // start of the last token. + EndOff += Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr); + + return EndOff-StartOff; +} + + +unsigned Rewriter::getLocationOffsetAndFileID(SourceLocation Loc, + unsigned &FileID) const { + std::pair<unsigned,unsigned> V = SourceMgr->getDecomposedFileLoc(Loc); + FileID = V.first; + return V.second; +} + + +/// getEditBuffer - Get or create a RewriteBuffer for the specified FileID. +/// +RewriteBuffer &Rewriter::getEditBuffer(unsigned FileID) { + std::map<unsigned, RewriteBuffer>::iterator I = + RewriteBuffers.lower_bound(FileID); + if (I != RewriteBuffers.end() && I->first == FileID) + return I->second; + I = RewriteBuffers.insert(I, std::make_pair(FileID, RewriteBuffer())); + + std::pair<const char*, const char*> MB = SourceMgr->getBufferData(FileID); + I->second.Initialize(MB.first, MB.second); + + return I->second; +} + +/// InsertText - Insert the specified string at the specified location in the +/// original buffer. +bool Rewriter::InsertText(SourceLocation Loc, + const char *StrData, unsigned StrLen) { + if (!isRewritable(Loc)) return true; + unsigned FileID; + unsigned StartOffs = getLocationOffsetAndFileID(Loc, FileID); + getEditBuffer(FileID).InsertText(StartOffs, StrData, StrLen); + return false; +} + +/// RemoveText - Remove the specified text region. +bool Rewriter::RemoveText(SourceLocation Start, unsigned Length) { + if (!isRewritable(Start)) return true; + unsigned FileID; + unsigned StartOffs = getLocationOffsetAndFileID(Start, FileID); + getEditBuffer(FileID).RemoveText(StartOffs, Length); + return false; +} + +/// ReplaceText - This method replaces a range of characters in the input +/// buffer with a new string. This is effectively a combined "remove/insert" +/// operation. +bool Rewriter::ReplaceText(SourceLocation Start, unsigned OrigLength, + const char *NewStr, unsigned NewLength) { + if (!isRewritable(Start)) return true; + unsigned StartFileID; + unsigned StartOffs = getLocationOffsetAndFileID(Start, StartFileID); + + getEditBuffer(StartFileID).ReplaceText(StartOffs, OrigLength, + NewStr, NewLength); + return false; +} + +/// ReplaceStmt - This replaces a Stmt/Expr with another, using the pretty +/// printer to generate the replacement code. This returns true if the input +/// could not be rewritten, or false if successful. +bool Rewriter::ReplaceStmt(Stmt *From, Stmt *To) { + // Measaure the old text. + int Size = getRangeSize(From->getSourceRange()); + if (Size == -1) + return true; + + // Get the new text. + std::ostringstream S; + To->printPretty(S); + const std::string &Str = S.str(); + + ReplaceText(From->getLocStart(), Size, &Str[0], Str.size()); + return false; +} + + diff --git a/clang/lib/Sema/Makefile b/clang/lib/Sema/Makefile new file mode 100644 index 00000000000..19a00275f0f --- /dev/null +++ b/clang/lib/Sema/Makefile @@ -0,0 +1,23 @@ +##===- clang/lib/Sema/Makefile -----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements the semantic analyzer and AST builder library for the +# C-Language front-end. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME := clangSEMA +BUILD_ARCHIVE = 1 +CXXFLAGS = -fno-rtti + +CPPFLAGS += -I$(PROJ_SRC_DIR)/../../include + +include $(LEVEL)/Makefile.common + diff --git a/clang/lib/Sema/ParseAST.cpp b/clang/lib/Sema/ParseAST.cpp new file mode 100644 index 00000000000..364b0729100 --- /dev/null +++ b/clang/lib/Sema/ParseAST.cpp @@ -0,0 +1,69 @@ +//===--- ParseAST.cpp - Provide the clang::ParseAST method ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the clang::ParseAST method. +// +//===----------------------------------------------------------------------===// + +#include "clang/Sema/ParseAST.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/ASTConsumer.h" +#include "Sema.h" +#include "clang/Parse/Action.h" +#include "clang/Parse/Parser.h" +using namespace clang; + +//===----------------------------------------------------------------------===// +// Public interface to the file +//===----------------------------------------------------------------------===// + +/// ParseAST - Parse the entire file specified, notifying the ASTConsumer as +/// the file is parsed. This takes ownership of the ASTConsumer and +/// ultimately deletes it. +void clang::ParseAST(Preprocessor &PP, ASTConsumer *Consumer, bool PrintStats) { + // Collect global stats on Decls/Stmts (until we have a module streamer). + if (PrintStats) { + Decl::CollectingStats(true); + Stmt::CollectingStats(true); + } + + ASTContext Context(PP.getSourceManager(), PP.getTargetInfo(), + PP.getIdentifierTable(), PP.getSelectorTable()); + + Parser P(PP, *new Sema(PP, Context, *Consumer)); + PP.EnterMainSourceFile(); + + // Initialize the parser. + P.Initialize(); + + Consumer->Initialize(Context); + + Parser::DeclTy *ADecl; + while (!P.ParseTopLevelDecl(ADecl)) { // Not end of file. + // If we got a null return and something *was* parsed, ignore it. This + // is due to a top-level semicolon, an action override, or a parse error + // skipping something. + if (ADecl) + Consumer->HandleTopLevelDecl(static_cast<Decl*>(ADecl)); + }; + + if (PrintStats) { + fprintf(stderr, "\nSTATISTICS:\n"); + P.getActions().PrintStats(); + Context.PrintStats(); + Decl::PrintStats(); + Stmt::PrintStats(); + Consumer->PrintStats(); + + Decl::CollectingStats(false); + Stmt::CollectingStats(false); + } + + delete Consumer; +} diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp new file mode 100644 index 00000000000..4bd04e3a1a5 --- /dev/null +++ b/clang/lib/Sema/Sema.cpp @@ -0,0 +1,222 @@ +//===--- Sema.cpp - AST Builder and Semantic Analysis Implementation ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the actions class which performs semantic analysis and +// builds an AST out of a parse stream. +// +//===----------------------------------------------------------------------===// + +#include "Sema.h" +#include "clang/AST/ASTContext.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Parse/Scope.h" + +using namespace clang; + +bool Sema::isBuiltinObjCType(TypedefDecl *TD) { + const char *typeName = TD->getIdentifier()->getName(); + return strcmp(typeName, "id") == 0 || strcmp(typeName, "Class") == 0 || + strcmp(typeName, "SEL") == 0 || strcmp(typeName, "Protocol") == 0; +} + +bool Sema::isObjCObjectPointerType(QualType type) const { + if (!type->isPointerType() && !type->isObjCQualifiedIdType()) + return false; + if (type == Context.getObjCIdType() || type == Context.getObjCClassType() || + type->isObjCQualifiedIdType()) + return true; + + if (type->isPointerType()) { + PointerType *pointerType = static_cast<PointerType*>(type.getTypePtr()); + type = pointerType->getPointeeType(); + } + return (type->isObjCInterfaceType() || type->isObjCQualifiedIdType()); +} + +void Sema::ActOnTranslationUnitScope(SourceLocation Loc, Scope *S) { + TUScope = S; + if (!PP.getLangOptions().ObjC1) return; + + TypedefType *t; + + // Add the built-in ObjC types. + t = cast<TypedefType>(Context.getObjCIdType().getTypePtr()); + t->getDecl()->getIdentifier()->setFETokenInfo(t->getDecl()); + TUScope->AddDecl(t->getDecl()); + t = cast<TypedefType>(Context.getObjCClassType().getTypePtr()); + t->getDecl()->getIdentifier()->setFETokenInfo(t->getDecl()); + TUScope->AddDecl(t->getDecl()); + ObjCInterfaceType *it = cast<ObjCInterfaceType>(Context.getObjCProtoType()); + ObjCInterfaceDecl *IDecl = it->getDecl(); + IDecl->getIdentifier()->setFETokenInfo(IDecl); + TUScope->AddDecl(IDecl); + + // Synthesize "typedef struct objc_selector *SEL;" + RecordDecl *SelTag = RecordDecl::Create(Context, Decl::Struct, + SourceLocation(), + &Context.Idents.get("objc_selector"), + 0); + SelTag->getIdentifier()->setFETokenInfo(SelTag); + TUScope->AddDecl(SelTag); + + QualType SelT = Context.getPointerType(Context.getTagDeclType(SelTag)); + TypedefDecl *SelTypedef = TypedefDecl::Create(Context, SourceLocation(), + &Context.Idents.get("SEL"), + SelT, 0); + SelTypedef->getIdentifier()->setFETokenInfo(SelTypedef); + TUScope->AddDecl(SelTypedef); + Context.setObjCSelType(SelTypedef); +} + +Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer) + : PP(pp), Context(ctxt), Consumer(consumer), + CurFunctionDecl(0), CurMethodDecl(0) { + + // Get IdentifierInfo objects for known functions for which we + // do extra checking. + IdentifierTable &IT = PP.getIdentifierTable(); + + KnownFunctionIDs[id_printf] = &IT.get("printf"); + KnownFunctionIDs[id_fprintf] = &IT.get("fprintf"); + KnownFunctionIDs[id_sprintf] = &IT.get("sprintf"); + KnownFunctionIDs[id_snprintf] = &IT.get("snprintf"); + KnownFunctionIDs[id_asprintf] = &IT.get("asprintf"); + KnownFunctionIDs[id_vsnprintf] = &IT.get("vsnprintf"); + KnownFunctionIDs[id_vasprintf] = &IT.get("vasprintf"); + KnownFunctionIDs[id_vfprintf] = &IT.get("vfprintf"); + KnownFunctionIDs[id_vsprintf] = &IT.get("vsprintf"); + KnownFunctionIDs[id_vprintf] = &IT.get("vprintf"); + + // FIXME: Move this initialization up to Sema::ActOnTranslationUnitScope() + // and make sure the decls get inserted into TUScope! + if (PP.getLangOptions().ObjC1) { + // Synthesize "typedef struct objc_class *Class;" + RecordDecl *ClassTag = RecordDecl::Create(Context, Decl::Struct, + SourceLocation(), + &IT.get("objc_class"), 0); + QualType ClassT = Context.getPointerType(Context.getTagDeclType(ClassTag)); + TypedefDecl *ClassTypedef = + TypedefDecl::Create(Context, SourceLocation(), + &Context.Idents.get("Class"), ClassT, 0); + Context.setObjCClassType(ClassTypedef); + + // Synthesize "@class Protocol; + ObjCInterfaceDecl *ProtocolDecl = new ObjCInterfaceDecl(SourceLocation(), 0, + &Context.Idents.get("Protocol"), true); + Context.setObjCProtoType(Context.getObjCInterfaceType(ProtocolDecl)); + + // Synthesize "typedef struct objc_object { Class isa; } *id;" + RecordDecl *ObjectTag = + RecordDecl::Create(Context, Decl::Struct, SourceLocation(), + &IT.get("objc_object"), 0); + FieldDecl *IsaDecl = new FieldDecl(SourceLocation(), 0, + Context.getObjCClassType()); + ObjectTag->defineBody(&IsaDecl, 1); + QualType ObjT = Context.getPointerType(Context.getTagDeclType(ObjectTag)); + TypedefDecl *IdTypedef = TypedefDecl::Create(Context, SourceLocation(), + &Context.Idents.get("id"), + ObjT, 0); + Context.setObjCIdType(IdTypedef); + } + TUScope = 0; +} + +/// ImpCastExprToType - If Expr is not of type 'Type', insert an implicit cast. +/// If there is already an implicit cast, merge into the existing one. +void Sema::ImpCastExprToType(Expr *&Expr, QualType Type) { + if (Expr->getType().getCanonicalType() == Type.getCanonicalType()) return; + + if (ImplicitCastExpr *ImpCast = dyn_cast<ImplicitCastExpr>(Expr)) + ImpCast->setType(Type); + else + Expr = new ImplicitCastExpr(Type, Expr); +} + + + +void Sema::DeleteExpr(ExprTy *E) { + delete static_cast<Expr*>(E); +} +void Sema::DeleteStmt(StmtTy *S) { + delete static_cast<Stmt*>(S); +} + +//===----------------------------------------------------------------------===// +// Helper functions. +//===----------------------------------------------------------------------===// + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID) { + PP.getDiagnostics().Report(PP.getFullLoc(Loc), DiagID); + return true; +} + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg) { + PP.getDiagnostics().Report(PP.getFullLoc(Loc), DiagID, &Msg, 1); + return true; +} + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg1, + const std::string &Msg2) { + std::string MsgArr[] = { Msg1, Msg2 }; + PP.getDiagnostics().Report(PP.getFullLoc(Loc), DiagID, MsgArr, 2); + return true; +} + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID, SourceRange Range) { + PP.getDiagnostics().Report(PP.getFullLoc(Loc), DiagID, 0, 0, &Range,1); + return true; +} + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg, + SourceRange Range) { + PP.getDiagnostics().Report(PP.getFullLoc(Loc), DiagID, &Msg, 1, &Range,1); + return true; +} + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg1, + const std::string &Msg2, SourceRange Range) { + std::string MsgArr[] = { Msg1, Msg2 }; + PP.getDiagnostics().Report(PP.getFullLoc(Loc), DiagID, MsgArr, 2, &Range, 1); + return true; +} + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg1, + const std::string &Msg2, const std::string &Msg3, + SourceRange R1) { + std::string MsgArr[] = { Msg1, Msg2, Msg3 }; + PP.getDiagnostics().Report(PP.getFullLoc(Loc), DiagID, MsgArr, 3, &R1, 1); + return true; +} + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID, + SourceRange R1, SourceRange R2) { + SourceRange RangeArr[] = { R1, R2 }; + PP.getDiagnostics().Report(PP.getFullLoc(Loc), DiagID, 0, 0, RangeArr, 2); + return true; +} + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg, + SourceRange R1, SourceRange R2) { + SourceRange RangeArr[] = { R1, R2 }; + PP.getDiagnostics().Report(PP.getFullLoc(Loc), DiagID, &Msg, 1, RangeArr, 2); + return true; +} + +bool Sema::Diag(SourceLocation Range, unsigned DiagID, const std::string &Msg1, + const std::string &Msg2, SourceRange R1, SourceRange R2) { + std::string MsgArr[] = { Msg1, Msg2 }; + SourceRange RangeArr[] = { R1, R2 }; + PP.getDiagnostics().Report(PP.getFullLoc(Range),DiagID, MsgArr,2,RangeArr, 2); + return true; +} + +const LangOptions &Sema::getLangOptions() const { + return PP.getLangOptions(); +} diff --git a/clang/lib/Sema/Sema.h b/clang/lib/Sema/Sema.h new file mode 100644 index 00000000000..fc81781911c --- /dev/null +++ b/clang/lib/Sema/Sema.h @@ -0,0 +1,823 @@ +//===--- Sema.h - Semantic Analysis & AST Building --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Sema class, which performs semantic analysis and +// builds ASTs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_SEMA_H +#define LLVM_CLANG_AST_SEMA_H + +#include "clang/Parse/Action.h" +#include "clang/Parse/DeclSpec.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include <vector> +#include <string> + +namespace llvm { + class APSInt; +} + +namespace clang { + class ASTContext; + class ASTConsumer; + class Preprocessor; + class Decl; + class ScopedDecl; + class Expr; + class InitListExpr; + class CallExpr; + class VarDecl; + class ParmVarDecl; + class TypedefDecl; + class FunctionDecl; + class QualType; + struct LangOptions; + class Token; + class IntegerLiteral; + class StringLiteral; + class ArrayType; + class LabelStmt; + class SwitchStmt; + class OCUVectorType; + class TypedefDecl; + class ObjCInterfaceDecl; + class ObjCProtocolDecl; + class ObjCImplementationDecl; + class ObjCCategoryImplDecl; + class ObjCCategoryDecl; + class ObjCIvarDecl; + class ObjCMethodDecl; + +/// Sema - This implements semantic analysis and AST building for C. +class Sema : public Action { + Preprocessor &PP; + ASTContext &Context; + ASTConsumer &Consumer; + + /// CurFunctionDecl - If inside of a function body, this contains a pointer to + /// the function decl for the function being parsed. + FunctionDecl *CurFunctionDecl; + + /// CurMethodDecl - If inside of a method body, this contains a pointer to + /// the method decl for the method being parsed. + ObjCMethodDecl *CurMethodDecl; + + /// LabelMap - This is a mapping from label identifiers to the LabelStmt for + /// it (which acts like the label decl in some ways). Forward referenced + /// labels have a LabelStmt created for them with a null location & SubStmt. + llvm::DenseMap<IdentifierInfo*, LabelStmt*> LabelMap; + + llvm::SmallVector<SwitchStmt*, 8> SwitchStack; + + /// OCUVectorDecls - This is a list all the OCU vector types. This allows + /// us to associate a raw vector type with one of the OCU type names. + /// This is only necessary for issuing pretty diagnostics. + llvm::SmallVector<TypedefDecl*, 24> OCUVectorDecls; + + /// ObjCImplementations - Keep track of all of the classes with + /// @implementation's, so that we can emit errors on duplicates. + llvm::DenseMap<IdentifierInfo*, ObjCImplementationDecl*> ObjCImplementations; + + /// ObjCProtocols - Keep track of all protocol declarations declared + /// with @protocol keyword, so that we can emit errors on duplicates and + /// find the declarations when needed. + llvm::DenseMap<IdentifierInfo*, ObjCProtocolDecl*> ObjCProtocols; + + // Enum values used by KnownFunctionIDs (see below). + enum { + id_printf, + id_fprintf, + id_sprintf, + id_snprintf, + id_asprintf, + id_vsnprintf, + id_vasprintf, + id_vfprintf, + id_vsprintf, + id_vprintf, + id_num_known_functions + }; + + /// KnownFunctionIDs - This is a list of IdentifierInfo objects to a set + /// of known functions used by the semantic analysis to do various + /// kinds of checking (e.g. checking format string errors in printf calls). + /// This list is populated upon the creation of a Sema object. + IdentifierInfo* KnownFunctionIDs[ id_num_known_functions ]; + + /// Translation Unit Scope - useful to Objective-C actions that need + /// to lookup file scope declarations in the "ordinary" C decl namespace. + /// For example, user-defined classes, built-in "id" type, etc. + Scope *TUScope; + + /// ObjCMethodList - a linked list of methods with different signatures. + struct ObjCMethodList { + ObjCMethodDecl *Method; + ObjCMethodList *Next; + + ObjCMethodList() { + Method = 0; + Next = 0; + } + ObjCMethodList(ObjCMethodDecl *M, ObjCMethodList *C) { + Method = M; + Next = C; + } + }; + /// Instance/Factory Method Pools - allows efficient lookup when typechecking + /// messages to "id". We need to maintain a list, since selectors can have + /// differing signatures across classes. In Cocoa, this happens to be + /// extremely uncommon (only 1% of selectors are "overloaded"). + llvm::DenseMap<Selector, ObjCMethodList> InstanceMethodPool; + llvm::DenseMap<Selector, ObjCMethodList> FactoryMethodPool; +public: + Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer); + + const LangOptions &getLangOptions() const; + + /// The primitive diagnostic helpers - always returns true, which simplifies + /// error handling (i.e. less code). + bool Diag(SourceLocation Loc, unsigned DiagID); + bool Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg); + bool Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg1, + const std::string &Msg2); + + /// More expressive diagnostic helpers for expressions (say that 6 times:-) + bool Diag(SourceLocation Loc, unsigned DiagID, SourceRange R1); + bool Diag(SourceLocation Loc, unsigned DiagID, + SourceRange R1, SourceRange R2); + bool Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg, + SourceRange R1); + bool Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg, + SourceRange R1, SourceRange R2); + bool Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg1, + const std::string &Msg2, SourceRange R1); + bool Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg1, + const std::string &Msg2, const std::string &Msg3, SourceRange R1); + bool Diag(SourceLocation Loc, unsigned DiagID, + const std::string &Msg1, const std::string &Msg2, + SourceRange R1, SourceRange R2); + + virtual void DeleteExpr(ExprTy *E); + virtual void DeleteStmt(StmtTy *S); + + //===--------------------------------------------------------------------===// + // Type Analysis / Processing: SemaType.cpp. + // + QualType ConvertDeclSpecToType(DeclSpec &DS); + AttributeList *ProcessTypeAttributes(QualType &Result, AttributeList *AL); + QualType GetTypeForDeclarator(Declarator &D, Scope *S); + + + QualType ObjCGetTypeForMethodDefinition(DeclTy *D); + + + virtual TypeResult ActOnTypeName(Scope *S, Declarator &D); + + virtual TypeResult ActOnParamDeclaratorType(Scope *S, Declarator &D); +private: + //===--------------------------------------------------------------------===// + // Symbol table / Decl tracking callbacks: SemaDecl.cpp. + // + virtual DeclTy *isTypeName(const IdentifierInfo &II, Scope *S) const; + virtual DeclTy *ActOnDeclarator(Scope *S, Declarator &D, DeclTy *LastInGroup); + void AddInitializerToDecl(DeclTy *dcl, ExprTy *init); + virtual DeclTy *FinalizeDeclaratorGroup(Scope *S, DeclTy *Group); + + virtual DeclTy *ActOnStartOfFunctionDef(Scope *S, Declarator &D); + virtual void ObjCActOnStartOfMethodDef(Scope *S, DeclTy *D); + + virtual DeclTy *ActOnFinishFunctionBody(DeclTy *Decl, StmtTy *Body); + virtual DeclTy *ActOnLinkageSpec(SourceLocation Loc, SourceLocation LBrace, + SourceLocation RBrace, const char *Lang, + unsigned StrSize, DeclTy *D); + virtual DeclTy *ActOnFileScopeAsmDecl(SourceLocation Loc, ExprTy *expr); + + /// Scope actions. + virtual void ActOnPopScope(SourceLocation Loc, Scope *S); + virtual void ActOnTranslationUnitScope(SourceLocation Loc, Scope *S); + + /// ParsedFreeStandingDeclSpec - This method is invoked when a declspec with + /// no declarator (e.g. "struct foo;") is parsed. + virtual DeclTy *ParsedFreeStandingDeclSpec(Scope *S, DeclSpec &DS); + + virtual DeclTy *ActOnTag(Scope *S, unsigned TagType, TagKind TK, + SourceLocation KWLoc, IdentifierInfo *Name, + SourceLocation NameLoc, AttributeList *Attr); + virtual DeclTy *ActOnField(Scope *S, DeclTy *TagDecl,SourceLocation DeclStart, + Declarator &D, ExprTy *BitfieldWidth); + + // This is used for both record definitions and ObjC interface declarations. + virtual void ActOnFields(Scope* S, + SourceLocation RecLoc, DeclTy *TagDecl, + DeclTy **Fields, unsigned NumFields, + SourceLocation LBrac, SourceLocation RBrac, + tok::ObjCKeywordKind *visibility = 0); + virtual DeclTy *ActOnEnumConstant(Scope *S, DeclTy *EnumDecl, + DeclTy *LastEnumConstant, + SourceLocation IdLoc, IdentifierInfo *Id, + SourceLocation EqualLoc, ExprTy *Val); + virtual void ActOnEnumBody(SourceLocation EnumLoc, DeclTy *EnumDecl, + DeclTy **Elements, unsigned NumElements); +private: + /// Subroutines of ActOnDeclarator(). + TypedefDecl *ParseTypedefDecl(Scope *S, Declarator &D, QualType T, + ScopedDecl *LastDecl); + TypedefDecl *MergeTypeDefDecl(TypedefDecl *New, ScopedDecl *Old); + FunctionDecl *MergeFunctionDecl(FunctionDecl *New, ScopedDecl *Old); + VarDecl *MergeVarDecl(VarDecl *New, ScopedDecl *Old); + + /// More parsing and symbol table subroutines... + ParmVarDecl *ActOnParamDeclarator(struct DeclaratorChunk::ParamInfo &PI, + Scope *FnBodyScope); + ScopedDecl *LookupScopedDecl(IdentifierInfo *II, unsigned NSI, + SourceLocation IdLoc, Scope *S); + ScopedDecl *LookupInterfaceDecl(IdentifierInfo *II); + ObjCInterfaceDecl *getObjCInterfaceDecl(IdentifierInfo *Id); + ScopedDecl *LazilyCreateBuiltin(IdentifierInfo *II, unsigned ID, Scope *S); + ScopedDecl *ImplicitlyDefineFunction(SourceLocation Loc, IdentifierInfo &II, + Scope *S); + // Decl attributes - this routine is the top level dispatcher. + void HandleDeclAttributes(Decl *New, AttributeList *declspec_prefix, + AttributeList *declarator_postfix); + void HandleDeclAttribute(Decl *New, AttributeList *rawAttr); + + /// HandleAddressSpaceTypeAttribute - this attribute is only applicable to + /// objects without automatic storage duration. + /// The raw attribute contains 1 argument, the id of the address space + /// for the type. + QualType HandleAddressSpaceTypeAttribute(QualType curType, + AttributeList *rawAttr); + + // HandleVectorTypeAttribute - this attribute is only applicable to + // integral and float scalars, although arrays, pointers, and function + // return values are allowed in conjunction with this construct. Aggregates + // with this attribute are invalid, even if they are of the same size as a + // corresponding scalar. + // The raw attribute should contain precisely 1 argument, the vector size + // for the variable, measured in bytes. If curType and rawAttr are well + // formed, this routine will return a new vector type. + QualType HandleVectorTypeAttribute(QualType curType, AttributeList *rawAttr); + void HandleOCUVectorTypeAttribute(TypedefDecl *d, AttributeList *rawAttr); + + void HandleAlignedAttribute(Decl *d, AttributeList *rawAttr); + void HandlePackedAttribute(Decl *d, AttributeList *rawAttr); + void HandleAnnotateAttribute(Decl *d, AttributeList *rawAttr); + void HandleNoReturnAttribute(Decl *d, AttributeList *rawAttr); + void HandleDeprecatedAttribute(Decl *d, AttributeList *rawAttr); + void HandleWeakAttribute(Decl *d, AttributeList *rawAttr); + void HandleDLLImportAttribute(Decl *d, AttributeList *rawAttr); + void HandleDLLExportAttribute(Decl *d, AttributeList *rawAttr); + void HandleVisibilityAttribute(Decl *d, AttributeList *rawAttr); + void HandleNothrowAttribute(Decl *d, AttributeList *rawAttr); + void HandleFormatAttribute(Decl *d, AttributeList *rawAttr); + void HandleStdCallAttribute(Decl *d, AttributeList *rawAttr); + void HandleFastCallAttribute(Decl *d, AttributeList *rawAttr); + + void WarnUndefinedMethod(SourceLocation ImpLoc, ObjCMethodDecl *method, + bool &IncompleteImpl); + + /// CheckProtocolMethodDefs - This routine checks unimpletented methods + /// Declared in protocol, and those referenced by it. + void CheckProtocolMethodDefs(SourceLocation ImpLoc, + ObjCProtocolDecl *PDecl, + bool& IncompleteImpl, + const llvm::DenseSet<Selector> &InsMap, + const llvm::DenseSet<Selector> &ClsMap); + + /// CheckImplementationIvars - This routine checks if the instance variables + /// listed in the implelementation match those listed in the interface. + void CheckImplementationIvars(ObjCImplementationDecl *ImpDecl, + ObjCIvarDecl **Fields, unsigned nIvars, + SourceLocation Loc); + + /// ImplMethodsVsClassMethods - This is main routine to warn if any method + /// remains unimplemented in the @implementation class. + void ImplMethodsVsClassMethods(ObjCImplementationDecl* IMPDecl, + ObjCInterfaceDecl* IDecl); + + /// ImplCategoryMethodsVsIntfMethods - Checks that methods declared in the + /// category interface is implemented in the category @implementation. + void ImplCategoryMethodsVsIntfMethods(ObjCCategoryImplDecl *CatImplDecl, + ObjCCategoryDecl *CatClassDecl); + /// MatchTwoMethodDeclarations - Checks if two methods' type match and returns + /// true, or false, accordingly. + bool MatchTwoMethodDeclarations(const ObjCMethodDecl *Method, + const ObjCMethodDecl *PrevMethod); + + /// isBuiltinObjCType - Returns true of the type is "id", "SEL", "Class" + /// or "Protocol". + bool isBuiltinObjCType(TypedefDecl *TD); + + /// isObjCObjectPointerType - Returns true if type is an objective-c pointer + /// to an object type; such as "id", "Class", Intf*, id<P>, etc. + bool isObjCObjectPointerType(QualType type) const; + + /// AddInstanceMethodToGlobalPool - All instance methods in a translation + /// unit are added to a global pool. This allows us to efficiently associate + /// a selector with a method declaraation for purposes of typechecking + /// messages sent to "id" (where the class of the object is unknown). + void AddInstanceMethodToGlobalPool(ObjCMethodDecl *Method); + + /// AddFactoryMethodToGlobalPool - Same as above, but for factory methods. + void AddFactoryMethodToGlobalPool(ObjCMethodDecl *Method); + //===--------------------------------------------------------------------===// + // Statement Parsing Callbacks: SemaStmt.cpp. +public: + virtual StmtResult ActOnExprStmt(ExprTy *Expr); + + virtual StmtResult ActOnNullStmt(SourceLocation SemiLoc); + virtual StmtResult ActOnCompoundStmt(SourceLocation L, SourceLocation R, + StmtTy **Elts, unsigned NumElts, + bool isStmtExpr); + virtual StmtResult ActOnDeclStmt(DeclTy *Decl, SourceLocation StartLoc, + SourceLocation EndLoc); + virtual StmtResult ActOnCaseStmt(SourceLocation CaseLoc, ExprTy *LHSVal, + SourceLocation DotDotDotLoc, ExprTy *RHSVal, + SourceLocation ColonLoc, StmtTy *SubStmt); + virtual StmtResult ActOnDefaultStmt(SourceLocation DefaultLoc, + SourceLocation ColonLoc, StmtTy *SubStmt, + Scope *CurScope); + virtual StmtResult ActOnLabelStmt(SourceLocation IdentLoc, IdentifierInfo *II, + SourceLocation ColonLoc, StmtTy *SubStmt); + virtual StmtResult ActOnIfStmt(SourceLocation IfLoc, ExprTy *CondVal, + StmtTy *ThenVal, SourceLocation ElseLoc, + StmtTy *ElseVal); + virtual StmtResult ActOnStartOfSwitchStmt(ExprTy *Cond); + virtual StmtResult ActOnFinishSwitchStmt(SourceLocation SwitchLoc, + StmtTy *Switch, ExprTy *Body); + virtual StmtResult ActOnWhileStmt(SourceLocation WhileLoc, ExprTy *Cond, + StmtTy *Body); + virtual StmtResult ActOnDoStmt(SourceLocation DoLoc, StmtTy *Body, + SourceLocation WhileLoc, ExprTy *Cond); + + virtual StmtResult ActOnForStmt(SourceLocation ForLoc, + SourceLocation LParenLoc, + StmtTy *First, ExprTy *Second, ExprTy *Third, + SourceLocation RParenLoc, StmtTy *Body); + virtual StmtResult ActOnObjCForCollectionStmt(SourceLocation ForColLoc, + SourceLocation LParenLoc, + StmtTy *First, ExprTy *Second, + SourceLocation RParenLoc, StmtTy *Body); + + virtual StmtResult ActOnGotoStmt(SourceLocation GotoLoc, + SourceLocation LabelLoc, + IdentifierInfo *LabelII); + virtual StmtResult ActOnIndirectGotoStmt(SourceLocation GotoLoc, + SourceLocation StarLoc, + ExprTy *DestExp); + virtual StmtResult ActOnContinueStmt(SourceLocation ContinueLoc, + Scope *CurScope); + virtual StmtResult ActOnBreakStmt(SourceLocation GotoLoc, Scope *CurScope); + + virtual StmtResult ActOnReturnStmt(SourceLocation ReturnLoc, + ExprTy *RetValExp); + + virtual StmtResult ActOnAsmStmt(SourceLocation AsmLoc, + bool IsSimple, + bool IsVolatile, + unsigned NumOutputs, + unsigned NumInputs, + std::string *Names, + ExprTy **Constraints, + ExprTy **Exprs, + ExprTy *AsmString, + unsigned NumClobbers, + ExprTy **Clobbers, + SourceLocation RParenLoc); + + virtual StmtResult ActOnObjCAtCatchStmt(SourceLocation AtLoc, + SourceLocation RParen, StmtTy *Parm, + StmtTy *Body, StmtTy *CatchList); + + virtual StmtResult ActOnObjCAtFinallyStmt(SourceLocation AtLoc, + StmtTy *Body); + + virtual StmtResult ActOnObjCAtTryStmt(SourceLocation AtLoc, + StmtTy *Try, + StmtTy *Catch, StmtTy *Finally); + + virtual StmtResult ActOnObjCAtThrowStmt(SourceLocation AtLoc, + StmtTy *Throw); + virtual StmtResult ActOnObjCAtSynchronizedStmt(SourceLocation AtLoc, + ExprTy *SynchExpr, + StmtTy *SynchBody); + + //===--------------------------------------------------------------------===// + // Expression Parsing Callbacks: SemaExpr.cpp. + + // Primary Expressions. + virtual ExprResult ActOnIdentifierExpr(Scope *S, SourceLocation Loc, + IdentifierInfo &II, + bool HasTrailingLParen); + virtual ExprResult ActOnPreDefinedExpr(SourceLocation Loc, + tok::TokenKind Kind); + virtual ExprResult ActOnNumericConstant(const Token &); + virtual ExprResult ActOnCharacterConstant(const Token &); + virtual ExprResult ActOnParenExpr(SourceLocation L, SourceLocation R, + ExprTy *Val); + + /// ActOnStringLiteral - The specified tokens were lexed as pasted string + /// fragments (e.g. "foo" "bar" L"baz"). + virtual ExprResult ActOnStringLiteral(const Token *Toks, unsigned NumToks); + + // Binary/Unary Operators. 'Tok' is the token for the operator. + virtual ExprResult ActOnUnaryOp(SourceLocation OpLoc, tok::TokenKind Op, + ExprTy *Input); + virtual ExprResult + ActOnSizeOfAlignOfTypeExpr(SourceLocation OpLoc, bool isSizeof, + SourceLocation LParenLoc, TypeTy *Ty, + SourceLocation RParenLoc); + + virtual ExprResult ActOnPostfixUnaryOp(SourceLocation OpLoc, + tok::TokenKind Kind, ExprTy *Input); + + virtual ExprResult ActOnArraySubscriptExpr(ExprTy *Base, SourceLocation LLoc, + ExprTy *Idx, SourceLocation RLoc); + virtual ExprResult ActOnMemberReferenceExpr(ExprTy *Base,SourceLocation OpLoc, + tok::TokenKind OpKind, + SourceLocation MemberLoc, + IdentifierInfo &Member); + + /// ActOnCallExpr - Handle a call to Fn with the specified array of arguments. + /// This provides the location of the left/right parens and a list of comma + /// locations. + virtual ExprResult ActOnCallExpr(ExprTy *Fn, SourceLocation LParenLoc, + ExprTy **Args, unsigned NumArgs, + SourceLocation *CommaLocs, + SourceLocation RParenLoc); + + virtual ExprResult ActOnCastExpr(SourceLocation LParenLoc, TypeTy *Ty, + SourceLocation RParenLoc, ExprTy *Op); + + virtual ExprResult ActOnCompoundLiteral(SourceLocation LParenLoc, TypeTy *Ty, + SourceLocation RParenLoc, ExprTy *Op); + + virtual ExprResult ActOnInitList(SourceLocation LParenLoc, + ExprTy **InitList, unsigned NumInit, + SourceLocation RParenLoc); + + virtual ExprResult ActOnBinOp(SourceLocation TokLoc, tok::TokenKind Kind, + ExprTy *LHS,ExprTy *RHS); + + /// ActOnConditionalOp - Parse a ?: operation. Note that 'LHS' may be null + /// in the case of a the GNU conditional expr extension. + virtual ExprResult ActOnConditionalOp(SourceLocation QuestionLoc, + SourceLocation ColonLoc, + ExprTy *Cond, ExprTy *LHS, ExprTy *RHS); + + /// ActOnAddrLabel - Parse the GNU address of label extension: "&&foo". + virtual ExprResult ActOnAddrLabel(SourceLocation OpLoc, SourceLocation LabLoc, + IdentifierInfo *LabelII); + + virtual ExprResult ActOnStmtExpr(SourceLocation LPLoc, StmtTy *SubStmt, + SourceLocation RPLoc); // "({..})" + + /// __builtin_offsetof(type, a.b[123][456].c) + virtual ExprResult ActOnBuiltinOffsetOf(SourceLocation BuiltinLoc, + SourceLocation TypeLoc, TypeTy *Arg1, + OffsetOfComponent *CompPtr, + unsigned NumComponents, + SourceLocation RParenLoc); + + // __builtin_types_compatible_p(type1, type2) + virtual ExprResult ActOnTypesCompatibleExpr(SourceLocation BuiltinLoc, + TypeTy *arg1, TypeTy *arg2, + SourceLocation RPLoc); + + // __builtin_choose_expr(constExpr, expr1, expr2) + virtual ExprResult ActOnChooseExpr(SourceLocation BuiltinLoc, + ExprTy *cond, ExprTy *expr1, ExprTy *expr2, + SourceLocation RPLoc); + + // __builtin_overload(...) + virtual ExprResult ActOnOverloadExpr(ExprTy **Args, unsigned NumArgs, + SourceLocation *CommaLocs, + SourceLocation BuiltinLoc, + SourceLocation RParenLoc); + + // __builtin_va_arg(expr, type) + virtual ExprResult ActOnVAArg(SourceLocation BuiltinLoc, + ExprTy *expr, TypeTy *type, + SourceLocation RPLoc); + + /// ActOnCXXCasts - Parse {dynamic,static,reinterpret,const}_cast's. + virtual ExprResult ActOnCXXCasts(SourceLocation OpLoc, tok::TokenKind Kind, + SourceLocation LAngleBracketLoc, TypeTy *Ty, + SourceLocation RAngleBracketLoc, + SourceLocation LParenLoc, ExprTy *E, + SourceLocation RParenLoc); + + /// ActOnCXXBoolLiteral - Parse {true,false} literals. + virtual ExprResult ActOnCXXBoolLiteral(SourceLocation OpLoc, + tok::TokenKind Kind); + + //// ActOnCXXThrow - Parse throw expressions. + virtual ExprResult ActOnCXXThrow(SourceLocation OpLoc, + ExprTy *expr); + + // ParseObjCStringLiteral - Parse Objective-C string literals. + virtual ExprResult ParseObjCStringLiteral(SourceLocation *AtLocs, + ExprTy **Strings, + unsigned NumStrings); + virtual ExprResult ParseObjCEncodeExpression(SourceLocation AtLoc, + SourceLocation EncodeLoc, + SourceLocation LParenLoc, + TypeTy *Ty, + SourceLocation RParenLoc); + + // ParseObjCSelectorExpression - Build selector expression for @selector + virtual ExprResult ParseObjCSelectorExpression(Selector Sel, + SourceLocation AtLoc, + SourceLocation SelLoc, + SourceLocation LParenLoc, + SourceLocation RParenLoc); + + // ParseObjCProtocolExpression - Build protocol expression for @protocol + virtual ExprResult ParseObjCProtocolExpression(IdentifierInfo * ProtocolName, + SourceLocation AtLoc, + SourceLocation ProtoLoc, + SourceLocation LParenLoc, + SourceLocation RParenLoc); + + // Objective-C declarations. + virtual DeclTy *ActOnStartClassInterface( + SourceLocation AtInterafceLoc, + IdentifierInfo *ClassName, SourceLocation ClassLoc, + IdentifierInfo *SuperName, SourceLocation SuperLoc, + IdentifierInfo **ProtocolNames, unsigned NumProtocols, + SourceLocation EndProtoLoc, AttributeList *AttrList); + + virtual DeclTy *ActOnCompatiblityAlias( + SourceLocation AtCompatibilityAliasLoc, + IdentifierInfo *AliasName, SourceLocation AliasLocation, + IdentifierInfo *ClassName, SourceLocation ClassLocation); + + virtual DeclTy *ActOnStartProtocolInterface( + SourceLocation AtProtoInterfaceLoc, + IdentifierInfo *ProtocolName, SourceLocation ProtocolLoc, + IdentifierInfo **ProtoRefNames, unsigned NumProtoRefs, + SourceLocation EndProtoLoc); + + virtual DeclTy *ActOnStartCategoryInterface( + SourceLocation AtInterfaceLoc, + IdentifierInfo *ClassName, SourceLocation ClassLoc, + IdentifierInfo *CategoryName, SourceLocation CategoryLoc, + IdentifierInfo **ProtoRefNames, unsigned NumProtoRefs, + SourceLocation EndProtoLoc); + + virtual DeclTy *ActOnStartClassImplementation( + SourceLocation AtClassImplLoc, + IdentifierInfo *ClassName, SourceLocation ClassLoc, + IdentifierInfo *SuperClassname, + SourceLocation SuperClassLoc); + + virtual DeclTy *ActOnStartCategoryImplementation( + SourceLocation AtCatImplLoc, + IdentifierInfo *ClassName, + SourceLocation ClassLoc, + IdentifierInfo *CatName, + SourceLocation CatLoc); + + virtual DeclTy *ActOnForwardClassDeclaration(SourceLocation Loc, + IdentifierInfo **IdentList, + unsigned NumElts); + + virtual DeclTy *ActOnForwardProtocolDeclaration(SourceLocation AtProtocolLoc, + IdentifierInfo **IdentList, + unsigned NumElts); + + virtual void FindProtocolDeclaration(SourceLocation TypeLoc, + IdentifierInfo **ProtocolId, + unsigned NumProtocols, + llvm::SmallVector<DeclTy *, 8> & + Protocols); + + virtual void ActOnAtEnd(SourceLocation AtEndLoc, DeclTy *classDecl, + DeclTy **allMethods = 0, unsigned allNum = 0, + DeclTy **allProperties = 0, unsigned pNum = 0); + + virtual DeclTy *ActOnAddObjCProperties(SourceLocation AtLoc, + DeclTy **allProperties, + unsigned NumProperties, + ObjCDeclSpec &DS); + + virtual DeclTy *ActOnMethodDeclaration( + SourceLocation BeginLoc, // location of the + or -. + SourceLocation EndLoc, // location of the ; or {. + tok::TokenKind MethodType, + DeclTy *ClassDecl, ObjCDeclSpec &ReturnQT, TypeTy *ReturnType, + Selector Sel, + // optional arguments. The number of types/arguments is obtained + // from the Sel.getNumArgs(). + ObjCDeclSpec *ArgQT, TypeTy **ArgTypes, IdentifierInfo **ArgNames, + AttributeList *AttrList, tok::ObjCKeywordKind MethodImplKind, + bool isVariadic = false); + + // ActOnClassMessage - used for both unary and keyword messages. + // ArgExprs is optional - if it is present, the number of expressions + // is obtained from NumArgs. + virtual ExprResult ActOnClassMessage( + Scope *S, + IdentifierInfo *receivingClassName, Selector Sel, + SourceLocation lbrac, SourceLocation rbrac, + ExprTy **ArgExprs, unsigned NumArgs); + + // ActOnInstanceMessage - used for both unary and keyword messages. + // ArgExprs is optional - if it is present, the number of expressions + // is obtained from NumArgs. + virtual ExprResult ActOnInstanceMessage( + ExprTy *receiver, Selector Sel, + SourceLocation lbrac, SourceLocation rbrac, + ExprTy **ArgExprs, unsigned NumArgs); +private: + /// ImpCastExprToType - If Expr is not of type 'Type', insert an implicit + /// cast. If there is already an implicit cast, merge into the existing one. + void ImpCastExprToType(Expr *&Expr, QualType Type); + + // UsualUnaryConversions - promotes integers (C99 6.3.1.1p2) and converts + // functions and arrays to their respective pointers (C99 6.3.2.1). + Expr *UsualUnaryConversions(Expr *&expr); + + // DefaultFunctionArrayConversion - converts functions and arrays + // to their respective pointers (C99 6.3.2.1). + void DefaultFunctionArrayConversion(Expr *&expr); + + // DefaultArgumentPromotion (C99 6.5.2.2p6). Used for function calls that + // do not have a prototype. Integer promotions are performed on each + // argument, and arguments that have type float are promoted to double. + void DefaultArgumentPromotion(Expr *&Expr); + + // UsualArithmeticConversions - performs the UsualUnaryConversions on it's + // operands and then handles various conversions that are common to binary + // operators (C99 6.3.1.8). If both operands aren't arithmetic, this + // routine returns the first non-arithmetic type found. The client is + // responsible for emitting appropriate error diagnostics. + QualType UsualArithmeticConversions(Expr *&lExpr, Expr *&rExpr, + bool isCompAssign = false); + + /// AssignConvertType - All of the 'assignment' semantic checks return this + /// enum to indicate whether the assignment was allowed. These checks are + /// done for simple assignments, as well as initialization, return from + /// function, argument passing, etc. The query is phrased in terms of a + /// source and destination type. + enum AssignConvertType { + /// Compatible - the types are compatible according to the standard. + Compatible, + + /// PointerToInt - The assignment converts a pointer to an int, which we + /// accept as an extension. + PointerToInt, + + /// IntToPointer - The assignment converts an int to a pointer, which we + /// accept as an extension. + IntToPointer, + + /// FunctionVoidPointer - The assignment is between a function pointer and + /// void*, which the standard doesn't allow, but we accept as an extension. + FunctionVoidPointer, + + /// IncompatiblePointer - The assignment is between two pointers types that + /// are not compatible, but we accept them as an extension. + IncompatiblePointer, + + /// CompatiblePointerDiscardsQualifiers - The assignment discards + /// c/v/r qualifiers, which we accept as an extension. + CompatiblePointerDiscardsQualifiers, + + /// Incompatible - We reject this conversion outright, it is invalid to + /// represent it in the AST. + Incompatible + }; + + /// DiagnoseAssignmentResult - Emit a diagnostic, if required, for the + /// assignment conversion type specified by ConvTy. This returns true if the + /// conversion was invalid or false if the conversion was accepted. + bool DiagnoseAssignmentResult(AssignConvertType ConvTy, + SourceLocation Loc, + QualType DstType, QualType SrcType, + Expr *SrcExpr, const char *Flavor); + + /// CheckAssignmentConstraints - Perform type checking for assignment, + /// argument passing, variable initialization, and function return values. + /// This routine is only used by the following two methods. C99 6.5.16. + AssignConvertType CheckAssignmentConstraints(QualType lhs, QualType rhs); + + // CheckSingleAssignmentConstraints - Currently used by ActOnCallExpr, + // CheckAssignmentOperands, and ActOnReturnStmt. Prior to type checking, + // this routine performs the default function/array converions. + AssignConvertType CheckSingleAssignmentConstraints(QualType lhs, + Expr *&rExpr); + // CheckCompoundAssignmentConstraints - Type check without performing any + // conversions. For compound assignments, the "Check...Operands" methods + // perform the necessary conversions. + AssignConvertType CheckCompoundAssignmentConstraints(QualType lhs, + QualType rhs); + + // Helper function for CheckAssignmentConstraints (C99 6.5.16.1p1) + AssignConvertType CheckPointerTypesForAssignment(QualType lhsType, + QualType rhsType); + + /// the following "Check" methods will return a valid/converted QualType + /// or a null QualType (indicating an error diagnostic was issued). + + /// type checking binary operators (subroutines of ActOnBinOp). + inline QualType InvalidOperands(SourceLocation l, Expr *&lex, Expr *&rex); + inline QualType CheckVectorOperands(SourceLocation l, Expr *&lex, Expr *&rex); + inline QualType CheckMultiplyDivideOperands( // C99 6.5.5 + Expr *&lex, Expr *&rex, SourceLocation OpLoc, bool isCompAssign = false); + inline QualType CheckRemainderOperands( // C99 6.5.5 + Expr *&lex, Expr *&rex, SourceLocation OpLoc, bool isCompAssign = false); + inline QualType CheckAdditionOperands( // C99 6.5.6 + Expr *&lex, Expr *&rex, SourceLocation OpLoc, bool isCompAssign = false); + inline QualType CheckSubtractionOperands( // C99 6.5.6 + Expr *&lex, Expr *&rex, SourceLocation OpLoc, bool isCompAssign = false); + inline QualType CheckShiftOperands( // C99 6.5.7 + Expr *&lex, Expr *&rex, SourceLocation OpLoc, bool isCompAssign = false); + inline QualType CheckCompareOperands( // C99 6.5.8/9 + Expr *&lex, Expr *&rex, SourceLocation OpLoc, bool isRelational); + inline QualType CheckBitwiseOperands( // C99 6.5.[10...12] + Expr *&lex, Expr *&rex, SourceLocation OpLoc, bool isCompAssign = false); + inline QualType CheckLogicalOperands( // C99 6.5.[13,14] + Expr *&lex, Expr *&rex, SourceLocation OpLoc); + // CheckAssignmentOperands is used for both simple and compound assignment. + // For simple assignment, pass both expressions and a null converted type. + // For compound assignment, pass both expressions and the converted type. + inline QualType CheckAssignmentOperands( // C99 6.5.16.[1,2] + Expr *lex, Expr *&rex, SourceLocation OpLoc, QualType convertedType); + inline QualType CheckCommaOperands( // C99 6.5.17 + Expr *&lex, Expr *&rex, SourceLocation OpLoc); + inline QualType CheckConditionalOperands( // C99 6.5.15 + Expr *&cond, Expr *&lhs, Expr *&rhs, SourceLocation questionLoc); + + /// type checking unary operators (subroutines of ActOnUnaryOp). + /// C99 6.5.3.1, 6.5.3.2, 6.5.3.4 + QualType CheckIncrementDecrementOperand(Expr *op, SourceLocation OpLoc); + QualType CheckAddressOfOperand(Expr *op, SourceLocation OpLoc); + QualType CheckIndirectionOperand(Expr *op, SourceLocation OpLoc); + QualType CheckSizeOfAlignOfOperand(QualType type, SourceLocation loc, + bool isSizeof); + QualType CheckRealImagOperand(Expr *&Op, SourceLocation OpLoc); + + /// type checking primary expressions. + QualType CheckOCUVectorComponent(QualType baseType, SourceLocation OpLoc, + IdentifierInfo &Comp, SourceLocation CmpLoc); + + /// type checking declaration initializers (C99 6.7.8) + bool CheckInitializerTypes(Expr *&simpleInit_or_initList, QualType &declType); + bool CheckSingleInitializer(Expr *&simpleInit, QualType declType); + bool CheckInitExpr(Expr *expr, InitListExpr *IList, unsigned slot, + QualType ElementType); + bool CheckInitializerListTypes(InitListExpr*& IList, QualType &DeclType, + bool topLevel, unsigned& startIndex); + bool CheckForConstantInitializer(Expr *e, QualType t); + + StringLiteral *IsStringLiteralInit(Expr *Init, QualType DeclType); + bool CheckStringLiteralInit(StringLiteral *strLiteral, QualType &DeclT); + + // CheckVectorCast - check type constraints for vectors. + // Since vectors are an extension, there are no C standard reference for this. + // We allow casting between vectors and integer datatypes of the same size. + // returns true if the cast is invalid + bool CheckVectorCast(SourceRange R, QualType VectorTy, QualType Ty); + + // returns true if there were any incompatible arguments. + bool CheckMessageArgumentTypes(Expr **Args, unsigned NumArgs, + ObjCMethodDecl *Method); + + /// ConvertIntegerToTypeWarnOnOverflow - Convert the specified APInt to have + /// the specified width and sign. If an overflow occurs, detect it and emit + /// the specified diagnostic. + void ConvertIntegerToTypeWarnOnOverflow(llvm::APSInt &OldVal, + unsigned NewWidth, bool NewSign, + SourceLocation Loc, unsigned DiagID); + + void InitBuiltinVaListType(); + + //===--------------------------------------------------------------------===// + // Extra semantic analysis beyond the C type system +private: + bool CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall); + bool CheckBuiltinCFStringArgument(Expr* Arg); + bool SemaBuiltinVAStart(CallExpr *TheCall); + bool SemaBuiltinUnorderedCompare(CallExpr *TheCall); + void CheckPrintfArguments(CallExpr *TheCall, + bool HasVAListArg, unsigned format_idx); + void CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, + SourceLocation ReturnLoc); + void CheckFloatComparison(SourceLocation loc, Expr* lex, Expr* rex); +}; + + +} // end namespace clang + +#endif diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp new file mode 100644 index 00000000000..6d3eea5e730 --- /dev/null +++ b/clang/lib/Sema/SemaChecking.cpp @@ -0,0 +1,802 @@ +//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements extra semantic analysis beyond what is enforced +// by the C type system. +// +//===----------------------------------------------------------------------===// + +#include "Sema.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "SemaUtil.h" +using namespace clang; + +/// CheckFunctionCall - Check a direct function call for various correctness +/// and safety properties not strictly enforced by the C type system. +bool +Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { + + // Get the IdentifierInfo* for the called function. + IdentifierInfo *FnInfo = FDecl->getIdentifier(); + + switch (FnInfo->getBuiltinID()) { + case Builtin::BI__builtin___CFStringMakeConstantString: + assert(TheCall->getNumArgs() == 1 && + "Wrong # arguments to builtin CFStringMakeConstantString"); + return CheckBuiltinCFStringArgument(TheCall->getArg(0)); + case Builtin::BI__builtin_va_start: + return SemaBuiltinVAStart(TheCall); + + case Builtin::BI__builtin_isgreater: + case Builtin::BI__builtin_isgreaterequal: + case Builtin::BI__builtin_isless: + case Builtin::BI__builtin_islessequal: + case Builtin::BI__builtin_islessgreater: + case Builtin::BI__builtin_isunordered: + return SemaBuiltinUnorderedCompare(TheCall); + } + + // Search the KnownFunctionIDs for the identifier. + unsigned i = 0, e = id_num_known_functions; + for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } + if (i == e) return false; + + // Printf checking. + if (i <= id_vprintf) { + // Retrieve the index of the format string parameter and determine + // if the function is passed a va_arg argument. + unsigned format_idx = 0; + bool HasVAListArg = false; + + switch (i) { + default: assert(false && "No format string argument index."); + case id_printf: format_idx = 0; break; + case id_fprintf: format_idx = 1; break; + case id_sprintf: format_idx = 1; break; + case id_snprintf: format_idx = 2; break; + case id_asprintf: format_idx = 1; break; + case id_vsnprintf: format_idx = 2; HasVAListArg = true; break; + case id_vasprintf: format_idx = 1; HasVAListArg = true; break; + case id_vfprintf: format_idx = 1; HasVAListArg = true; break; + case id_vsprintf: format_idx = 1; HasVAListArg = true; break; + case id_vprintf: format_idx = 0; HasVAListArg = true; break; + } + + CheckPrintfArguments(TheCall, HasVAListArg, format_idx); + } + + return false; +} + +/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin +/// CFString constructor is correct +bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) { + Arg = Arg->IgnoreParenCasts(); + + StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); + + if (!Literal || Literal->isWide()) { + Diag(Arg->getLocStart(), + diag::err_cfstring_literal_not_string_constant, + Arg->getSourceRange()); + return true; + } + + const char *Data = Literal->getStrData(); + unsigned Length = Literal->getByteLength(); + + for (unsigned i = 0; i < Length; ++i) { + if (!isascii(Data[i])) { + Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), + diag::warn_cfstring_literal_contains_non_ascii_character, + Arg->getSourceRange()); + break; + } + + if (!Data[i]) { + Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), + diag::warn_cfstring_literal_contains_nul_character, + Arg->getSourceRange()); + break; + } + } + + return false; +} + +/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. +/// Emit an error and return true on failure, return false on success. +bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { + Expr *Fn = TheCall->getCallee(); + if (TheCall->getNumArgs() > 2) { + Diag(TheCall->getArg(2)->getLocStart(), + diag::err_typecheck_call_too_many_args, Fn->getSourceRange(), + SourceRange(TheCall->getArg(2)->getLocStart(), + (*(TheCall->arg_end()-1))->getLocEnd())); + return true; + } + + // Determine whether the current function is variadic or not. + bool isVariadic; + if (CurFunctionDecl) + isVariadic = + cast<FunctionTypeProto>(CurFunctionDecl->getType())->isVariadic(); + else + isVariadic = CurMethodDecl->isVariadic(); + + if (!isVariadic) { + Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); + return true; + } + + // Verify that the second argument to the builtin is the last argument of the + // current function or method. + bool SecondArgIsLastNamedArgument = false; + const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); + + if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { + if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { + // FIXME: This isn't correct for methods (results in bogus warning). + // Get the last formal in the current function. + const ParmVarDecl *LastArg; + if (CurFunctionDecl) + LastArg = *(CurFunctionDecl->param_end()-1); + else + LastArg = *(CurMethodDecl->param_end()-1); + SecondArgIsLastNamedArgument = PV == LastArg; + } + } + + if (!SecondArgIsLastNamedArgument) + Diag(TheCall->getArg(1)->getLocStart(), + diag::warn_second_parameter_of_va_start_not_last_named_argument); + return false; +} + +/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and +/// friends. This is declared to take (...), so we have to check everything. +bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { + if (TheCall->getNumArgs() < 2) + return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args); + if (TheCall->getNumArgs() > 2) + return Diag(TheCall->getArg(2)->getLocStart(), + diag::err_typecheck_call_too_many_args, + SourceRange(TheCall->getArg(2)->getLocStart(), + (*(TheCall->arg_end()-1))->getLocEnd())); + + Expr *OrigArg0 = TheCall->getArg(0); + Expr *OrigArg1 = TheCall->getArg(1); + + // Do standard promotions between the two arguments, returning their common + // type. + QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); + + // If the common type isn't a real floating type, then the arguments were + // invalid for this operation. + if (!Res->isRealFloatingType()) + return Diag(OrigArg0->getLocStart(), + diag::err_typecheck_call_invalid_ordered_compare, + OrigArg0->getType().getAsString(), + OrigArg1->getType().getAsString(), + SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd())); + + return false; +} + + +/// CheckPrintfArguments - Check calls to printf (and similar functions) for +/// correct use of format strings. +/// +/// HasVAListArg - A predicate indicating whether the printf-like +/// function is passed an explicit va_arg argument (e.g., vprintf) +/// +/// format_idx - The index into Args for the format string. +/// +/// Improper format strings to functions in the printf family can be +/// the source of bizarre bugs and very serious security holes. A +/// good source of information is available in the following paper +/// (which includes additional references): +/// +/// FormatGuard: Automatic Protection From printf Format String +/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. +/// +/// Functionality implemented: +/// +/// We can statically check the following properties for string +/// literal format strings for non v.*printf functions (where the +/// arguments are passed directly): +// +/// (1) Are the number of format conversions equal to the number of +/// data arguments? +/// +/// (2) Does each format conversion correctly match the type of the +/// corresponding data argument? (TODO) +/// +/// Moreover, for all printf functions we can: +/// +/// (3) Check for a missing format string (when not caught by type checking). +/// +/// (4) Check for no-operation flags; e.g. using "#" with format +/// conversion 'c' (TODO) +/// +/// (5) Check the use of '%n', a major source of security holes. +/// +/// (6) Check for malformed format conversions that don't specify anything. +/// +/// (7) Check for empty format strings. e.g: printf(""); +/// +/// (8) Check that the format string is a wide literal. +/// +/// (9) Also check the arguments of functions with the __format__ attribute. +/// (TODO). +/// +/// All of these checks can be done by parsing the format string. +/// +/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). +void +Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg, + unsigned format_idx) { + Expr *Fn = TheCall->getCallee(); + + // CHECK: printf-like function is called with no format string. + if (format_idx >= TheCall->getNumArgs()) { + Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string, + Fn->getSourceRange()); + return; + } + + Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); + + // CHECK: format string is not a string literal. + // + // Dynamically generated format strings are difficult to + // automatically vet at compile time. Requiring that format strings + // are string literals: (1) permits the checking of format strings by + // the compiler and thereby (2) can practically remove the source of + // many format string exploits. + StringLiteral *FExpr = dyn_cast<StringLiteral>(OrigFormatExpr); + if (FExpr == NULL) { + // For vprintf* functions (i.e., HasVAListArg==true), we add a + // special check to see if the format string is a function parameter + // of the function calling the printf function. If the function + // has an attribute indicating it is a printf-like function, then we + // should suppress warnings concerning non-literals being used in a call + // to a vprintf function. For example: + // + // void + // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { + // va_list ap; + // va_start(ap, fmt); + // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". + // ... + // + // + // FIXME: We don't have full attribute support yet, so just check to see + // if the argument is a DeclRefExpr that references a parameter. We'll + // add proper support for checking the attribute later. + if (HasVAListArg) + if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) + if (isa<ParmVarDecl>(DR->getDecl())) + return; + + Diag(TheCall->getArg(format_idx)->getLocStart(), + diag::warn_printf_not_string_constant, Fn->getSourceRange()); + return; + } + + // CHECK: is the format string a wide literal? + if (FExpr->isWide()) { + Diag(FExpr->getLocStart(), + diag::warn_printf_format_string_is_wide_literal, Fn->getSourceRange()); + return; + } + + // Str - The format string. NOTE: this is NOT null-terminated! + const char * const Str = FExpr->getStrData(); + + // CHECK: empty format string? + const unsigned StrLen = FExpr->getByteLength(); + + if (StrLen == 0) { + Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string, + Fn->getSourceRange()); + return; + } + + // We process the format string using a binary state machine. The + // current state is stored in CurrentState. + enum { + state_OrdChr, + state_Conversion + } CurrentState = state_OrdChr; + + // numConversions - The number of conversions seen so far. This is + // incremented as we traverse the format string. + unsigned numConversions = 0; + + // numDataArgs - The number of data arguments after the format + // string. This can only be determined for non vprintf-like + // functions. For those functions, this value is 1 (the sole + // va_arg argument). + unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1); + + // Inspect the format string. + unsigned StrIdx = 0; + + // LastConversionIdx - Index within the format string where we last saw + // a '%' character that starts a new format conversion. + unsigned LastConversionIdx = 0; + + for (; StrIdx < StrLen; ++StrIdx) { + + // Is the number of detected conversion conversions greater than + // the number of matching data arguments? If so, stop. + if (!HasVAListArg && numConversions > numDataArgs) break; + + // Handle "\0" + if (Str[StrIdx] == '\0') { + // The string returned by getStrData() is not null-terminated, + // so the presence of a null character is likely an error. + Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), + diag::warn_printf_format_string_contains_null_char, + Fn->getSourceRange()); + return; + } + + // Ordinary characters (not processing a format conversion). + if (CurrentState == state_OrdChr) { + if (Str[StrIdx] == '%') { + CurrentState = state_Conversion; + LastConversionIdx = StrIdx; + } + continue; + } + + // Seen '%'. Now processing a format conversion. + switch (Str[StrIdx]) { + // Handle dynamic precision or width specifier. + case '*': { + ++numConversions; + + if (!HasVAListArg && numConversions > numDataArgs) { + SourceLocation Loc = FExpr->getLocStart(); + Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1); + + if (Str[StrIdx-1] == '.') + Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg, + Fn->getSourceRange()); + else + Diag(Loc, diag::warn_printf_asterisk_width_missing_arg, + Fn->getSourceRange()); + + // Don't do any more checking. We'll just emit spurious errors. + return; + } + + // Perform type checking on width/precision specifier. + Expr *E = TheCall->getArg(format_idx+numConversions); + if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) + if (BT->getKind() == BuiltinType::Int) + break; + + SourceLocation Loc = + PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1); + + if (Str[StrIdx-1] == '.') + Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type, + E->getType().getAsString(), E->getSourceRange()); + else + Diag(Loc, diag::warn_printf_asterisk_width_wrong_type, + E->getType().getAsString(), E->getSourceRange()); + + break; + } + + // Characters which can terminate a format conversion + // (e.g. "%d"). Characters that specify length modifiers or + // other flags are handled by the default case below. + // + // FIXME: additional checks will go into the following cases. + case 'i': + case 'd': + case 'o': + case 'u': + case 'x': + case 'X': + case 'D': + case 'O': + case 'U': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + case 'a': + case 'A': + case 'c': + case 'C': + case 'S': + case 's': + case 'p': + ++numConversions; + CurrentState = state_OrdChr; + break; + + // CHECK: Are we using "%n"? Issue a warning. + case 'n': { + ++numConversions; + CurrentState = state_OrdChr; + SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), + LastConversionIdx+1); + + Diag(Loc, diag::warn_printf_write_back, Fn->getSourceRange()); + break; + } + + // Handle "%%" + case '%': + // Sanity check: Was the first "%" character the previous one? + // If not, we will assume that we have a malformed format + // conversion, and that the current "%" character is the start + // of a new conversion. + if (StrIdx - LastConversionIdx == 1) + CurrentState = state_OrdChr; + else { + // Issue a warning: invalid format conversion. + SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), + LastConversionIdx+1); + + Diag(Loc, diag::warn_printf_invalid_conversion, + std::string(Str+LastConversionIdx, Str+StrIdx), + Fn->getSourceRange()); + + // This conversion is broken. Advance to the next format + // conversion. + LastConversionIdx = StrIdx; + ++numConversions; + } + break; + + default: + // This case catches all other characters: flags, widths, etc. + // We should eventually process those as well. + break; + } + } + + if (CurrentState == state_Conversion) { + // Issue a warning: invalid format conversion. + SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), + LastConversionIdx+1); + + Diag(Loc, diag::warn_printf_invalid_conversion, + std::string(Str+LastConversionIdx, + Str+std::min(LastConversionIdx+2, StrLen)), + Fn->getSourceRange()); + return; + } + + if (!HasVAListArg) { + // CHECK: Does the number of format conversions exceed the number + // of data arguments? + if (numConversions > numDataArgs) { + SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), + LastConversionIdx); + + Diag(Loc, diag::warn_printf_insufficient_data_args, + Fn->getSourceRange()); + } + // CHECK: Does the number of data arguments exceed the number of + // format conversions in the format string? + else if (numConversions < numDataArgs) + Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), + diag::warn_printf_too_many_data_args, Fn->getSourceRange()); + } +} + +//===--- CHECK: Return Address of Stack Variable --------------------------===// + +static DeclRefExpr* EvalVal(Expr *E); +static DeclRefExpr* EvalAddr(Expr* E); + +/// CheckReturnStackAddr - Check if a return statement returns the address +/// of a stack variable. +void +Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, + SourceLocation ReturnLoc) { + + // Perform checking for returned stack addresses. + if (lhsType->isPointerType()) { + if (DeclRefExpr *DR = EvalAddr(RetValExp)) + Diag(DR->getLocStart(), diag::warn_ret_stack_addr, + DR->getDecl()->getIdentifier()->getName(), + RetValExp->getSourceRange()); + } + // Perform checking for stack values returned by reference. + else if (lhsType->isReferenceType()) { + // Check for an implicit cast to a reference. + if (ImplicitCastExpr *I = dyn_cast<ImplicitCastExpr>(RetValExp)) + if (DeclRefExpr *DR = EvalVal(I->getSubExpr())) + Diag(DR->getLocStart(), diag::warn_ret_stack_ref, + DR->getDecl()->getIdentifier()->getName(), + RetValExp->getSourceRange()); + } +} + +/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that +/// check if the expression in a return statement evaluates to an address +/// to a location on the stack. The recursion is used to traverse the +/// AST of the return expression, with recursion backtracking when we +/// encounter a subexpression that (1) clearly does not lead to the address +/// of a stack variable or (2) is something we cannot determine leads to +/// the address of a stack variable based on such local checking. +/// +/// EvalAddr processes expressions that are pointers that are used as +/// references (and not L-values). EvalVal handles all other values. +/// At the base case of the recursion is a check for a DeclRefExpr* in +/// the refers to a stack variable. +/// +/// This implementation handles: +/// +/// * pointer-to-pointer casts +/// * implicit conversions from array references to pointers +/// * taking the address of fields +/// * arbitrary interplay between "&" and "*" operators +/// * pointer arithmetic from an address of a stack variable +/// * taking the address of an array element where the array is on the stack +static DeclRefExpr* EvalAddr(Expr *E) { + // We should only be called for evaluating pointer expressions. + assert((E->getType()->isPointerType() || + E->getType()->isObjCQualifiedIdType()) && + "EvalAddr only works on pointers"); + + // Our "symbolic interpreter" is just a dispatch off the currently + // viewed AST node. We then recursively traverse the AST by calling + // EvalAddr and EvalVal appropriately. + switch (E->getStmtClass()) { + case Stmt::ParenExprClass: + // Ignore parentheses. + return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); + + case Stmt::UnaryOperatorClass: { + // The only unary operator that make sense to handle here + // is AddrOf. All others don't make sense as pointers. + UnaryOperator *U = cast<UnaryOperator>(E); + + if (U->getOpcode() == UnaryOperator::AddrOf) + return EvalVal(U->getSubExpr()); + else + return NULL; + } + + case Stmt::BinaryOperatorClass: { + // Handle pointer arithmetic. All other binary operators are not valid + // in this context. + BinaryOperator *B = cast<BinaryOperator>(E); + BinaryOperator::Opcode op = B->getOpcode(); + + if (op != BinaryOperator::Add && op != BinaryOperator::Sub) + return NULL; + + Expr *Base = B->getLHS(); + + // Determine which argument is the real pointer base. It could be + // the RHS argument instead of the LHS. + if (!Base->getType()->isPointerType()) Base = B->getRHS(); + + assert (Base->getType()->isPointerType()); + return EvalAddr(Base); + } + + // For conditional operators we need to see if either the LHS or RHS are + // valid DeclRefExpr*s. If one of them is valid, we return it. + case Stmt::ConditionalOperatorClass: { + ConditionalOperator *C = cast<ConditionalOperator>(E); + + // Handle the GNU extension for missing LHS. + if (Expr *lhsExpr = C->getLHS()) + if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) + return LHS; + + return EvalAddr(C->getRHS()); + } + + // For implicit casts, we need to handle conversions from arrays to + // pointer values, and implicit pointer-to-pointer conversions. + case Stmt::ImplicitCastExprClass: { + ImplicitCastExpr *IE = cast<ImplicitCastExpr>(E); + Expr* SubExpr = IE->getSubExpr(); + + if (SubExpr->getType()->isPointerType() || + SubExpr->getType()->isObjCQualifiedIdType()) + return EvalAddr(SubExpr); + else + return EvalVal(SubExpr); + } + + // For casts, we handle pointer-to-pointer conversions (which + // is essentially a no-op from our mini-interpreter's standpoint). + // For other casts we abort. + case Stmt::CastExprClass: { + CastExpr *C = cast<CastExpr>(E); + Expr *SubExpr = C->getSubExpr(); + + if (SubExpr->getType()->isPointerType()) + return EvalAddr(SubExpr); + else + return NULL; + } + + // C++ casts. For dynamic casts, static casts, and const casts, we + // are always converting from a pointer-to-pointer, so we just blow + // through the cast. In the case the dynamic cast doesn't fail + // (and return NULL), we take the conservative route and report cases + // where we return the address of a stack variable. For Reinterpre + case Stmt::CXXCastExprClass: { + CXXCastExpr *C = cast<CXXCastExpr>(E); + + if (C->getOpcode() == CXXCastExpr::ReinterpretCast) { + Expr *S = C->getSubExpr(); + if (S->getType()->isPointerType()) + return EvalAddr(S); + else + return NULL; + } + else + return EvalAddr(C->getSubExpr()); + } + + // Everything else: we simply don't reason about them. + default: + return NULL; + } +} + + +/// EvalVal - This function is complements EvalAddr in the mutual recursion. +/// See the comments for EvalAddr for more details. +static DeclRefExpr* EvalVal(Expr *E) { + + // We should only be called for evaluating non-pointer expressions, or + // expressions with a pointer type that are not used as references but instead + // are l-values (e.g., DeclRefExpr with a pointer type). + + // Our "symbolic interpreter" is just a dispatch off the currently + // viewed AST node. We then recursively traverse the AST by calling + // EvalAddr and EvalVal appropriately. + switch (E->getStmtClass()) { + case Stmt::DeclRefExprClass: { + // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking + // at code that refers to a variable's name. We check if it has local + // storage within the function, and if so, return the expression. + DeclRefExpr *DR = cast<DeclRefExpr>(E); + + if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) + if(V->hasLocalStorage()) return DR; + + return NULL; + } + + case Stmt::ParenExprClass: + // Ignore parentheses. + return EvalVal(cast<ParenExpr>(E)->getSubExpr()); + + case Stmt::UnaryOperatorClass: { + // The only unary operator that make sense to handle here + // is Deref. All others don't resolve to a "name." This includes + // handling all sorts of rvalues passed to a unary operator. + UnaryOperator *U = cast<UnaryOperator>(E); + + if (U->getOpcode() == UnaryOperator::Deref) + return EvalAddr(U->getSubExpr()); + + return NULL; + } + + case Stmt::ArraySubscriptExprClass: { + // Array subscripts are potential references to data on the stack. We + // retrieve the DeclRefExpr* for the array variable if it indeed + // has local storage. + return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); + } + + case Stmt::ConditionalOperatorClass: { + // For conditional operators we need to see if either the LHS or RHS are + // non-NULL DeclRefExpr's. If one is non-NULL, we return it. + ConditionalOperator *C = cast<ConditionalOperator>(E); + + // Handle the GNU extension for missing LHS. + if (Expr *lhsExpr = C->getLHS()) + if (DeclRefExpr *LHS = EvalVal(lhsExpr)) + return LHS; + + return EvalVal(C->getRHS()); + } + + // Accesses to members are potential references to data on the stack. + case Stmt::MemberExprClass: { + MemberExpr *M = cast<MemberExpr>(E); + + // Check for indirect access. We only want direct field accesses. + if (!M->isArrow()) + return EvalVal(M->getBase()); + else + return NULL; + } + + // Everything else: we simply don't reason about them. + default: + return NULL; + } +} + +//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// + +/// Check for comparisons of floating point operands using != and ==. +/// Issue a warning if these are no self-comparisons, as they are not likely +/// to do what the programmer intended. +void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { + bool EmitWarning = true; + + Expr* LeftExprSansParen = lex->IgnoreParens(); + Expr* RightExprSansParen = rex->IgnoreParens(); + + // Special case: check for x == x (which is OK). + // Do not emit warnings for such cases. + if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) + if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) + if (DRL->getDecl() == DRR->getDecl()) + EmitWarning = false; + + + // Special case: check for comparisons against literals that can be exactly + // represented by APFloat. In such cases, do not emit a warning. This + // is a heuristic: often comparison against such literals are used to + // detect if a value in a variable has not changed. This clearly can + // lead to false negatives. + if (EmitWarning) { + if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { + if (FLL->isExact()) + EmitWarning = false; + } + else + if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ + if (FLR->isExact()) + EmitWarning = false; + } + } + + // Check for comparisons with builtin types. + if (EmitWarning) + if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) + if (isCallBuiltin(CL)) + EmitWarning = false; + + if (EmitWarning) + if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) + if (isCallBuiltin(CR)) + EmitWarning = false; + + // Emit the diagnostic. + if (EmitWarning) + Diag(loc, diag::warn_floatingpoint_eq, + lex->getSourceRange(),rex->getSourceRange()); +} diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp new file mode 100644 index 00000000000..7914e865741 --- /dev/null +++ b/clang/lib/Sema/SemaDecl.cpp @@ -0,0 +1,2297 @@ +//===--- SemaDecl.cpp - Semantic Analysis for Declarations ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements semantic analysis for declarations. +// +//===----------------------------------------------------------------------===// + +#include "Sema.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Builtins.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Type.h" +#include "clang/Parse/DeclSpec.h" +#include "clang/Parse/Scope.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/SourceManager.h" +// FIXME: layering (ideally, Sema shouldn't be dependent on Lex API's) +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/HeaderSearch.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/DenseSet.h" +using namespace clang; + +Sema::DeclTy *Sema::isTypeName(const IdentifierInfo &II, Scope *S) const { + Decl *IIDecl = II.getFETokenInfo<Decl>(); + // Find first occurance of none-tagged declaration + while(IIDecl && IIDecl->getIdentifierNamespace() != Decl::IDNS_Ordinary) + IIDecl = cast<ScopedDecl>(IIDecl)->getNext(); + if (!IIDecl) + return 0; + if (isa<TypedefDecl>(IIDecl) || isa<ObjCInterfaceDecl>(IIDecl)) + return IIDecl; + if (ObjCCompatibleAliasDecl *ADecl = + dyn_cast<ObjCCompatibleAliasDecl>(IIDecl)) + return ADecl->getClassInterface(); + return 0; +} + +void Sema::ActOnPopScope(SourceLocation Loc, Scope *S) { + if (S->decl_empty()) return; + assert((S->getFlags() & Scope::DeclScope) &&"Scope shouldn't contain decls!"); + + for (Scope::decl_iterator I = S->decl_begin(), E = S->decl_end(); + I != E; ++I) { + Decl *TmpD = static_cast<Decl*>(*I); + assert(TmpD && "This decl didn't get pushed??"); + ScopedDecl *D = dyn_cast<ScopedDecl>(TmpD); + assert(D && "This decl isn't a ScopedDecl?"); + + IdentifierInfo *II = D->getIdentifier(); + if (!II) continue; + + // Unlink this decl from the identifier. Because the scope contains decls + // in an unordered collection, and because we have multiple identifier + // namespaces (e.g. tag, normal, label),the decl may not be the first entry. + if (II->getFETokenInfo<Decl>() == D) { + // Normal case, no multiple decls in different namespaces. + II->setFETokenInfo(D->getNext()); + } else { + // Scan ahead. There are only three namespaces in C, so this loop can + // never execute more than 3 times. + ScopedDecl *SomeDecl = II->getFETokenInfo<ScopedDecl>(); + while (SomeDecl->getNext() != D) { + SomeDecl = SomeDecl->getNext(); + assert(SomeDecl && "Didn't find this decl on its identifier's chain!"); + } + SomeDecl->setNext(D->getNext()); + } + + // This will have to be revisited for C++: there we want to nest stuff in + // namespace decls etc. Even for C, we might want a top-level translation + // unit decl or something. + if (!CurFunctionDecl) + continue; + + // Chain this decl to the containing function, it now owns the memory for + // the decl. + D->setNext(CurFunctionDecl->getDeclChain()); + CurFunctionDecl->setDeclChain(D); + } +} + +/// LookupInterfaceDecl - Lookup interface declaration in the scope chain. +/// Return the first declaration found (which may or may not be a class +/// declaration. Caller is responsible for handling the none-class case. +/// Bypassing the alias of a class by returning the aliased class. +ScopedDecl *Sema::LookupInterfaceDecl(IdentifierInfo *ClassName) { + ScopedDecl *IDecl; + // Scan up the scope chain looking for a decl that matches this identifier + // that is in the appropriate namespace. + for (IDecl = ClassName->getFETokenInfo<ScopedDecl>(); IDecl; + IDecl = IDecl->getNext()) + if (IDecl->getIdentifierNamespace() == Decl::IDNS_Ordinary) + break; + + if (ObjCCompatibleAliasDecl *ADecl = + dyn_cast_or_null<ObjCCompatibleAliasDecl>(IDecl)) + return ADecl->getClassInterface(); + return IDecl; +} + +/// getObjCInterfaceDecl - Look up a for a class declaration in the scope. +/// return 0 if one not found. +ObjCInterfaceDecl *Sema::getObjCInterfaceDecl(IdentifierInfo *Id) { + ScopedDecl *IdDecl = LookupInterfaceDecl(Id); + return cast_or_null<ObjCInterfaceDecl>(IdDecl); +} + +/// LookupScopedDecl - Look up the inner-most declaration in the specified +/// namespace. +ScopedDecl *Sema::LookupScopedDecl(IdentifierInfo *II, unsigned NSI, + SourceLocation IdLoc, Scope *S) { + if (II == 0) return 0; + Decl::IdentifierNamespace NS = (Decl::IdentifierNamespace)NSI; + + // Scan up the scope chain looking for a decl that matches this identifier + // that is in the appropriate namespace. This search should not take long, as + // shadowing of names is uncommon, and deep shadowing is extremely uncommon. + for (ScopedDecl *D = II->getFETokenInfo<ScopedDecl>(); D; D = D->getNext()) + if (D->getIdentifierNamespace() == NS) + return D; + + // If we didn't find a use of this identifier, and if the identifier + // corresponds to a compiler builtin, create the decl object for the builtin + // now, injecting it into translation unit scope, and return it. + if (NS == Decl::IDNS_Ordinary) { + // If this is a builtin on this (or all) targets, create the decl. + if (unsigned BuiltinID = II->getBuiltinID()) + return LazilyCreateBuiltin(II, BuiltinID, S); + } + return 0; +} + +void Sema::InitBuiltinVaListType() +{ + if (!Context.getBuiltinVaListType().isNull()) + return; + + IdentifierInfo *VaIdent = &Context.Idents.get("__builtin_va_list"); + ScopedDecl *VaDecl = LookupScopedDecl(VaIdent, Decl::IDNS_Ordinary, + SourceLocation(), TUScope); + TypedefDecl *VaTypedef = cast<TypedefDecl>(VaDecl); + Context.setBuiltinVaListType(Context.getTypedefType(VaTypedef)); +} + +/// LazilyCreateBuiltin - The specified Builtin-ID was first used at file scope. +/// lazily create a decl for it. +ScopedDecl *Sema::LazilyCreateBuiltin(IdentifierInfo *II, unsigned bid, + Scope *S) { + Builtin::ID BID = (Builtin::ID)bid; + + if (BID == Builtin::BI__builtin_va_start || + BID == Builtin::BI__builtin_va_copy || + BID == Builtin::BI__builtin_va_end) + InitBuiltinVaListType(); + + QualType R = Context.BuiltinInfo.GetBuiltinType(BID, Context); + FunctionDecl *New = FunctionDecl::Create(Context, SourceLocation(), II, R, + FunctionDecl::Extern, false, 0); + + // Find translation-unit scope to insert this function into. + if (Scope *FnS = S->getFnParent()) + S = FnS->getParent(); // Skip all scopes in a function at once. + while (S->getParent()) + S = S->getParent(); + S->AddDecl(New); + + // Add this decl to the end of the identifier info. + if (ScopedDecl *LastDecl = II->getFETokenInfo<ScopedDecl>()) { + // Scan until we find the last (outermost) decl in the id chain. + while (LastDecl->getNext()) + LastDecl = LastDecl->getNext(); + // Insert before (outside) it. + LastDecl->setNext(New); + } else { + II->setFETokenInfo(New); + } + return New; +} + +/// MergeTypeDefDecl - We just parsed a typedef 'New' which has the same name +/// and scope as a previous declaration 'Old'. Figure out how to resolve this +/// situation, merging decls or emitting diagnostics as appropriate. +/// +TypedefDecl *Sema::MergeTypeDefDecl(TypedefDecl *New, ScopedDecl *OldD) { + // Verify the old decl was also a typedef. + TypedefDecl *Old = dyn_cast<TypedefDecl>(OldD); + if (!Old) { + Diag(New->getLocation(), diag::err_redefinition_different_kind, + New->getName()); + Diag(OldD->getLocation(), diag::err_previous_definition); + return New; + } + + // Allow multiple definitions for ObjC built-in typedefs. + // FIXME: Verify the underlying types are equivalent! + if (getLangOptions().ObjC1 && isBuiltinObjCType(New)) + return Old; + + // Redeclaration of a type is a constraint violation (6.7.2.3p1). + // Apparently GCC, Intel, and Sun all silently ignore the redeclaration if + // *either* declaration is in a system header. The code below implements + // this adhoc compatibility rule. FIXME: The following code will not + // work properly when compiling ".i" files (containing preprocessed output). + SourceManager &SrcMgr = Context.getSourceManager(); + const FileEntry *OldDeclFile = SrcMgr.getFileEntryForLoc(Old->getLocation()); + const FileEntry *NewDeclFile = SrcMgr.getFileEntryForLoc(New->getLocation()); + HeaderSearch &HdrInfo = PP.getHeaderSearchInfo(); + DirectoryLookup::DirType OldDirType = HdrInfo.getFileDirFlavor(OldDeclFile); + DirectoryLookup::DirType NewDirType = HdrInfo.getFileDirFlavor(NewDeclFile); + + if ((OldDirType == DirectoryLookup::ExternCSystemHeaderDir || + NewDirType == DirectoryLookup::ExternCSystemHeaderDir) || + getLangOptions().Microsoft) + return New; + + // TODO: CHECK FOR CONFLICTS, multiple decls with same name in one scope. + // TODO: This is totally simplistic. It should handle merging functions + // together etc, merging extern int X; int X; ... + Diag(New->getLocation(), diag::err_redefinition, New->getName()); + Diag(Old->getLocation(), diag::err_previous_definition); + return New; +} + +/// DeclhasAttr - returns true if decl Declaration already has the target attribute. +static bool DeclHasAttr(const Decl *decl, const Attr *target) { + for (const Attr *attr = decl->getAttrs(); attr; attr = attr->getNext()) + if (attr->getKind() == target->getKind()) + return true; + + return false; +} + +/// MergeAttributes - append attributes from the Old decl to the New one. +static void MergeAttributes(Decl *New, Decl *Old) { + Attr *attr = const_cast<Attr*>(Old->getAttrs()), *tmp; + +// FIXME: fix this code to cleanup the Old attrs correctly + while (attr) { + tmp = attr; + attr = attr->getNext(); + + if (!DeclHasAttr(New, tmp)) { + New->addAttr(tmp); + } else { + tmp->setNext(0); + delete(tmp); + } + } +} + +/// MergeFunctionDecl - We just parsed a function 'New' which has the same name +/// and scope as a previous declaration 'Old'. Figure out how to resolve this +/// situation, merging decls or emitting diagnostics as appropriate. +/// +FunctionDecl *Sema::MergeFunctionDecl(FunctionDecl *New, ScopedDecl *OldD) { + // Verify the old decl was also a function. + FunctionDecl *Old = dyn_cast<FunctionDecl>(OldD); + if (!Old) { + Diag(New->getLocation(), diag::err_redefinition_different_kind, + New->getName()); + Diag(OldD->getLocation(), diag::err_previous_definition); + return New; + } + + MergeAttributes(New, Old); + + + QualType OldQType = Old->getCanonicalType(); + QualType NewQType = New->getCanonicalType(); + + // Function types need to be compatible, not identical. This handles + // duplicate function decls like "void f(int); void f(enum X);" properly. + if (Context.functionTypesAreCompatible(OldQType, NewQType)) + return New; + + // A function that has already been declared has been redeclared or defined + // with a different type- show appropriate diagnostic + diag::kind PrevDiag = Old->getBody() ? diag::err_previous_definition : + diag::err_previous_declaration; + + // TODO: CHECK FOR CONFLICTS, multiple decls with same name in one scope. + // TODO: This is totally simplistic. It should handle merging functions + // together etc, merging extern int X; int X; ... + Diag(New->getLocation(), diag::err_conflicting_types, New->getName()); + Diag(Old->getLocation(), PrevDiag); + return New; +} + +/// equivalentArrayTypes - Used to determine whether two array types are +/// equivalent. +/// We need to check this explicitly as an incomplete array definition is +/// considered a VariableArrayType, so will not match a complete array +/// definition that would be otherwise equivalent. +static bool areEquivalentArrayTypes(QualType NewQType, QualType OldQType) { + const ArrayType *NewAT = NewQType->getAsArrayType(); + const ArrayType *OldAT = OldQType->getAsArrayType(); + + if (!NewAT || !OldAT) + return false; + + // If either (or both) array types in incomplete we need to strip off the + // outer VariableArrayType. Once the outer VAT is removed the remaining + // types must be identical if the array types are to be considered + // equivalent. + // eg. int[][1] and int[1][1] become + // VAT(null, CAT(1, int)) and CAT(1, CAT(1, int)) + // removing the outermost VAT gives + // CAT(1, int) and CAT(1, int) + // which are equal, therefore the array types are equivalent. + if (NewAT->isIncompleteArrayType() || OldAT->isIncompleteArrayType()) { + if (NewAT->getIndexTypeQualifier() != OldAT->getIndexTypeQualifier()) + return false; + NewQType = NewAT->getElementType().getCanonicalType(); + OldQType = OldAT->getElementType().getCanonicalType(); + } + + return NewQType == OldQType; +} + +/// MergeVarDecl - We just parsed a variable 'New' which has the same name +/// and scope as a previous declaration 'Old'. Figure out how to resolve this +/// situation, merging decls or emitting diagnostics as appropriate. +/// +/// FIXME: Need to carefully consider tentative definition rules (C99 6.9.2p2). +/// For example, we incorrectly complain about i1, i4 from C99 6.9.2p4. +/// +VarDecl *Sema::MergeVarDecl(VarDecl *New, ScopedDecl *OldD) { + // Verify the old decl was also a variable. + VarDecl *Old = dyn_cast<VarDecl>(OldD); + if (!Old) { + Diag(New->getLocation(), diag::err_redefinition_different_kind, + New->getName()); + Diag(OldD->getLocation(), diag::err_previous_definition); + return New; + } + + MergeAttributes(New, Old); + + // Verify the types match. + if (Old->getCanonicalType() != New->getCanonicalType() && + !areEquivalentArrayTypes(New->getCanonicalType(), Old->getCanonicalType())) { + Diag(New->getLocation(), diag::err_redefinition, New->getName()); + Diag(Old->getLocation(), diag::err_previous_definition); + return New; + } + // C99 6.2.2p4: Check if we have a static decl followed by a non-static. + if (New->getStorageClass() == VarDecl::Static && + (Old->getStorageClass() == VarDecl::None || + Old->getStorageClass() == VarDecl::Extern)) { + Diag(New->getLocation(), diag::err_static_non_static, New->getName()); + Diag(Old->getLocation(), diag::err_previous_definition); + return New; + } + // C99 6.2.2p4: Check if we have a non-static decl followed by a static. + if (New->getStorageClass() != VarDecl::Static && + Old->getStorageClass() == VarDecl::Static) { + Diag(New->getLocation(), diag::err_non_static_static, New->getName()); + Diag(Old->getLocation(), diag::err_previous_definition); + return New; + } + // We've verified the types match, now handle "tentative" definitions. + FileVarDecl *OldFSDecl = dyn_cast<FileVarDecl>(Old); + FileVarDecl *NewFSDecl = dyn_cast<FileVarDecl>(New); + + if (OldFSDecl && NewFSDecl) { + // Handle C "tentative" external object definitions (C99 6.9.2). + bool OldIsTentative = false; + bool NewIsTentative = false; + + if (!OldFSDecl->getInit() && + (OldFSDecl->getStorageClass() == VarDecl::None || + OldFSDecl->getStorageClass() == VarDecl::Static)) + OldIsTentative = true; + + // FIXME: this check doesn't work (since the initializer hasn't been + // attached yet). This check should be moved to FinalizeDeclaratorGroup. + // Unfortunately, by the time we get to FinializeDeclaratorGroup, we've + // thrown out the old decl. + if (!NewFSDecl->getInit() && + (NewFSDecl->getStorageClass() == VarDecl::None || + NewFSDecl->getStorageClass() == VarDecl::Static)) + ; // change to NewIsTentative = true; once the code is moved. + + if (NewIsTentative || OldIsTentative) + return New; + } + if (Old->getStorageClass() != VarDecl::Extern && + New->getStorageClass() != VarDecl::Extern) { + Diag(New->getLocation(), diag::err_redefinition, New->getName()); + Diag(Old->getLocation(), diag::err_previous_definition); + } + return New; +} + +/// ParsedFreeStandingDeclSpec - This method is invoked when a declspec with +/// no declarator (e.g. "struct foo;") is parsed. +Sema::DeclTy *Sema::ParsedFreeStandingDeclSpec(Scope *S, DeclSpec &DS) { + // TODO: emit error on 'int;' or 'const enum foo;'. + // TODO: emit error on 'typedef int;' + // if (!DS.isMissingDeclaratorOk()) Diag(...); + + return dyn_cast_or_null<TagDecl>(static_cast<Decl *>(DS.getTypeRep())); +} + +bool Sema::CheckSingleInitializer(Expr *&Init, QualType DeclType) { + // Get the type before calling CheckSingleAssignmentConstraints(), since + // it can promote the expression. + QualType InitType = Init->getType(); + + AssignConvertType ConvTy = CheckSingleAssignmentConstraints(DeclType, Init); + return DiagnoseAssignmentResult(ConvTy, Init->getLocStart(), DeclType, + InitType, Init, "initializing"); +} + +bool Sema::CheckInitExpr(Expr *expr, InitListExpr *IList, unsigned slot, + QualType ElementType) { + Expr *savExpr = expr; // Might be promoted by CheckSingleInitializer. + if (CheckSingleInitializer(expr, ElementType)) + return true; // types weren't compatible. + + if (savExpr != expr) // The type was promoted, update initializer list. + IList->setInit(slot, expr); + return false; +} + +bool Sema::CheckStringLiteralInit(StringLiteral *strLiteral, QualType &DeclT) { + if (const IncompleteArrayType *IAT = DeclT->getAsIncompleteArrayType()) { + // C99 6.7.8p14. We have an array of character type with unknown size + // being initialized to a string literal. + llvm::APSInt ConstVal(32); + ConstVal = strLiteral->getByteLength() + 1; + // Return a new array type (C99 6.7.8p22). + DeclT = Context.getConstantArrayType(IAT->getElementType(), ConstVal, + ArrayType::Normal, 0); + } else if (const ConstantArrayType *CAT = DeclT->getAsConstantArrayType()) { + // C99 6.7.8p14. We have an array of character type with known size. + if (strLiteral->getByteLength() > (unsigned)CAT->getMaximumElements()) + Diag(strLiteral->getSourceRange().getBegin(), + diag::warn_initializer_string_for_char_array_too_long, + strLiteral->getSourceRange()); + } else { + assert(0 && "HandleStringLiteralInit(): Invalid array type"); + } + // Set type from "char *" to "constant array of char". + strLiteral->setType(DeclT); + // For now, we always return false (meaning success). + return false; +} + +StringLiteral *Sema::IsStringLiteralInit(Expr *Init, QualType DeclType) { + const ArrayType *AT = DeclType->getAsArrayType(); + if (AT && AT->getElementType()->isCharType()) { + return dyn_cast<StringLiteral>(Init); + } + return 0; +} + +// CheckInitializerListTypes - Checks the types of elements of an initializer +// list. This function is recursive: it calls itself to initialize subelements +// of aggregate types. Note that the topLevel parameter essentially refers to +// whether this expression "owns" the initializer list passed in, or if this +// initialization is taking elements out of a parent initializer. Each +// call to this function adds zero or more to startIndex, reports any errors, +// and returns true if it found any inconsistent types. +bool Sema::CheckInitializerListTypes(InitListExpr*& IList, QualType &DeclType, + bool topLevel, unsigned& startIndex) { + bool hadError = false; + + if (DeclType->isScalarType()) { + // The simplest case: initializing a single scalar + if (topLevel) { + Diag(IList->getLocStart(), diag::warn_braces_around_scalar_init, + IList->getSourceRange()); + } + if (startIndex < IList->getNumInits()) { + Expr* expr = IList->getInit(startIndex); + if (InitListExpr *SubInitList = dyn_cast<InitListExpr>(expr)) { + // FIXME: Should an error be reported here instead? + unsigned newIndex = 0; + CheckInitializerListTypes(SubInitList, DeclType, true, newIndex); + } else { + hadError |= CheckInitExpr(expr, IList, startIndex, DeclType); + } + ++startIndex; + } + // FIXME: Should an error be reported for empty initializer list + scalar? + } else if (DeclType->isVectorType()) { + if (startIndex < IList->getNumInits()) { + const VectorType *VT = DeclType->getAsVectorType(); + int maxElements = VT->getNumElements(); + QualType elementType = VT->getElementType(); + + for (int i = 0; i < maxElements; ++i) { + // Don't attempt to go past the end of the init list + if (startIndex >= IList->getNumInits()) + break; + Expr* expr = IList->getInit(startIndex); + if (InitListExpr *SubInitList = dyn_cast<InitListExpr>(expr)) { + unsigned newIndex = 0; + hadError |= CheckInitializerListTypes(SubInitList, elementType, + true, newIndex); + ++startIndex; + } else { + hadError |= CheckInitializerListTypes(IList, elementType, + false, startIndex); + } + } + } + } else if (DeclType->isAggregateType() || DeclType->isUnionType()) { + if (DeclType->isStructureType() || DeclType->isUnionType()) { + if (startIndex < IList->getNumInits() && !topLevel && + Context.typesAreCompatible(IList->getInit(startIndex)->getType(), + DeclType)) { + // We found a compatible struct; per the standard, this initializes the + // struct. (The C standard technically says that this only applies for + // initializers for declarations with automatic scope; however, this + // construct is unambiguous anyway because a struct cannot contain + // a type compatible with itself. We'll output an error when we check + // if the initializer is constant.) + // FIXME: Is a call to CheckSingleInitializer required here? + ++startIndex; + } else { + RecordDecl* structDecl = DeclType->getAsRecordType()->getDecl(); + + // If the record is invalid, some of it's members are invalid. To avoid + // confusion, we forgo checking the intializer for the entire record. + if (structDecl->isInvalidDecl()) + return true; + + // If structDecl is a forward declaration, this loop won't do anything; + // That's okay, because an error should get printed out elsewhere. It + // might be worthwhile to skip over the rest of the initializer, though. + int numMembers = structDecl->getNumMembers() - + structDecl->hasFlexibleArrayMember(); + for (int i = 0; i < numMembers; i++) { + // Don't attempt to go past the end of the init list + if (startIndex >= IList->getNumInits()) + break; + FieldDecl * curField = structDecl->getMember(i); + if (!curField->getIdentifier()) { + // Don't initialize unnamed fields, e.g. "int : 20;" + continue; + } + QualType fieldType = curField->getType(); + Expr* expr = IList->getInit(startIndex); + if (InitListExpr *SubInitList = dyn_cast<InitListExpr>(expr)) { + unsigned newStart = 0; + hadError |= CheckInitializerListTypes(SubInitList, fieldType, + true, newStart); + ++startIndex; + } else { + hadError |= CheckInitializerListTypes(IList, fieldType, + false, startIndex); + } + if (DeclType->isUnionType()) + break; + } + // FIXME: Implement flexible array initialization GCC extension (it's a + // really messy extension to implement, unfortunately...the necessary + // information isn't actually even here!) + } + } else if (DeclType->isArrayType()) { + // Check for the special-case of initializing an array with a string. + if (startIndex < IList->getNumInits()) { + if (StringLiteral *lit = IsStringLiteralInit(IList->getInit(startIndex), + DeclType)) { + CheckStringLiteralInit(lit, DeclType); + ++startIndex; + if (topLevel && startIndex < IList->getNumInits()) { + // We have leftover initializers; warn + Diag(IList->getInit(startIndex)->getLocStart(), + diag::err_excess_initializers_in_char_array_initializer, + IList->getInit(startIndex)->getSourceRange()); + } + return false; + } + } + int maxElements; + if (DeclType->isIncompleteArrayType()) { + // FIXME: use a proper constant + maxElements = 0x7FFFFFFF; + } else if (const VariableArrayType *VAT = + DeclType->getAsVariableArrayType()) { + // Check for VLAs; in standard C it would be possible to check this + // earlier, but I don't know where clang accepts VLAs (gcc accepts + // them in all sorts of strange places). + Diag(VAT->getSizeExpr()->getLocStart(), + diag::err_variable_object_no_init, + VAT->getSizeExpr()->getSourceRange()); + hadError = true; + maxElements = 0x7FFFFFFF; + } else { + const ConstantArrayType *CAT = DeclType->getAsConstantArrayType(); + maxElements = static_cast<int>(CAT->getSize().getZExtValue()); + } + QualType elementType = DeclType->getAsArrayType()->getElementType(); + int numElements = 0; + for (int i = 0; i < maxElements; ++i, ++numElements) { + // Don't attempt to go past the end of the init list + if (startIndex >= IList->getNumInits()) + break; + Expr* expr = IList->getInit(startIndex); + if (InitListExpr *SubInitList = dyn_cast<InitListExpr>(expr)) { + unsigned newIndex = 0; + hadError |= CheckInitializerListTypes(SubInitList, elementType, + true, newIndex); + ++startIndex; + } else { + hadError |= CheckInitializerListTypes(IList, elementType, + false, startIndex); + } + } + if (DeclType->isIncompleteArrayType()) { + // If this is an incomplete array type, the actual type needs to + // be calculated here + if (numElements == 0) { + // Sizing an array implicitly to zero is not allowed + // (It could in theory be allowed, but it doesn't really matter.) + Diag(IList->getLocStart(), + diag::err_at_least_one_initializer_needed_to_size_array); + hadError = true; + } else { + llvm::APSInt ConstVal(32); + ConstVal = numElements; + DeclType = Context.getConstantArrayType(elementType, ConstVal, + ArrayType::Normal, 0); + } + } + } else { + assert(0 && "Aggregate that isn't a function or array?!"); + } + } else { + // In C, all types are either scalars or aggregates, but + // additional handling is needed here for C++ (and possibly others?). + assert(0 && "Unsupported initializer type"); + } + + // If this init list is a base list, we set the type; an initializer doesn't + // fundamentally have a type, but this makes the ASTs a bit easier to read + if (topLevel) + IList->setType(DeclType); + + if (topLevel && startIndex < IList->getNumInits()) { + // We have leftover initializers; warn + Diag(IList->getInit(startIndex)->getLocStart(), + diag::warn_excess_initializers, + IList->getInit(startIndex)->getSourceRange()); + } + return hadError; +} + +bool Sema::CheckInitializerTypes(Expr *&Init, QualType &DeclType) { + // C99 6.7.8p3: The type of the entity to be initialized shall be an array + // of unknown size ("[]") or an object type that is not a variable array type. + if (const VariableArrayType *VAT = DeclType->getAsVariableArrayType()) + return Diag(VAT->getSizeExpr()->getLocStart(), + diag::err_variable_object_no_init, + VAT->getSizeExpr()->getSourceRange()); + + InitListExpr *InitList = dyn_cast<InitListExpr>(Init); + if (!InitList) { + // FIXME: Handle wide strings + if (StringLiteral *strLiteral = IsStringLiteralInit(Init, DeclType)) + return CheckStringLiteralInit(strLiteral, DeclType); + + if (DeclType->isArrayType()) + return Diag(Init->getLocStart(), + diag::err_array_init_list_required, + Init->getSourceRange()); + + return CheckSingleInitializer(Init, DeclType); + } + unsigned newIndex = 0; + return CheckInitializerListTypes(InitList, DeclType, true, newIndex); +} + +Sema::DeclTy * +Sema::ActOnDeclarator(Scope *S, Declarator &D, DeclTy *lastDecl) { + ScopedDecl *LastDeclarator = dyn_cast_or_null<ScopedDecl>((Decl *)lastDecl); + IdentifierInfo *II = D.getIdentifier(); + + // All of these full declarators require an identifier. If it doesn't have + // one, the ParsedFreeStandingDeclSpec action should be used. + if (II == 0) { + Diag(D.getDeclSpec().getSourceRange().getBegin(), + diag::err_declarator_need_ident, + D.getDeclSpec().getSourceRange(), D.getSourceRange()); + return 0; + } + + // The scope passed in may not be a decl scope. Zip up the scope tree until + // we find one that is. + while ((S->getFlags() & Scope::DeclScope) == 0) + S = S->getParent(); + + // See if this is a redefinition of a variable in the same scope. + ScopedDecl *PrevDecl = LookupScopedDecl(II, Decl::IDNS_Ordinary, + D.getIdentifierLoc(), S); + ScopedDecl *New; + bool InvalidDecl = false; + + QualType R = GetTypeForDeclarator(D, S); + assert(!R.isNull() && "GetTypeForDeclarator() returned null type"); + + if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_typedef) { + TypedefDecl *NewTD = ParseTypedefDecl(S, D, R, LastDeclarator); + if (!NewTD) return 0; + + // Handle attributes prior to checking for duplicates in MergeVarDecl + HandleDeclAttributes(NewTD, D.getDeclSpec().getAttributes(), + D.getAttributes()); + // Merge the decl with the existing one if appropriate. If the decl is + // in an outer scope, it isn't the same thing. + if (PrevDecl && S->isDeclScope(PrevDecl)) { + NewTD = MergeTypeDefDecl(NewTD, PrevDecl); + if (NewTD == 0) return 0; + } + New = NewTD; + if (S->getParent() == 0) { + // C99 6.7.7p2: If a typedef name specifies a variably modified type + // then it shall have block scope. + if (NewTD->getUnderlyingType()->isVariablyModifiedType()) { + // FIXME: Diagnostic needs to be fixed. + Diag(D.getIdentifierLoc(), diag::err_typecheck_illegal_vla); + InvalidDecl = true; + } + } + } else if (R.getTypePtr()->isFunctionType()) { + FunctionDecl::StorageClass SC = FunctionDecl::None; + switch (D.getDeclSpec().getStorageClassSpec()) { + default: assert(0 && "Unknown storage class!"); + case DeclSpec::SCS_auto: + case DeclSpec::SCS_register: + Diag(D.getIdentifierLoc(), diag::err_typecheck_sclass_func, + R.getAsString()); + InvalidDecl = true; + break; + case DeclSpec::SCS_unspecified: SC = FunctionDecl::None; break; + case DeclSpec::SCS_extern: SC = FunctionDecl::Extern; break; + case DeclSpec::SCS_static: SC = FunctionDecl::Static; break; + case DeclSpec::SCS_private_extern: SC = FunctionDecl::PrivateExtern;break; + } + + bool isInline = D.getDeclSpec().isInlineSpecified(); + FunctionDecl *NewFD = FunctionDecl::Create(Context, D.getIdentifierLoc(), + II, R, SC, isInline, + LastDeclarator); + // Handle attributes. + HandleDeclAttributes(NewFD, D.getDeclSpec().getAttributes(), + D.getAttributes()); + + // Merge the decl with the existing one if appropriate. Since C functions + // are in a flat namespace, make sure we consider decls in outer scopes. + if (PrevDecl) { + NewFD = MergeFunctionDecl(NewFD, PrevDecl); + if (NewFD == 0) return 0; + } + New = NewFD; + } else { + if (R.getTypePtr()->isObjCInterfaceType()) { + Diag(D.getIdentifierLoc(), diag::err_statically_allocated_object, + D.getIdentifier()->getName()); + InvalidDecl = true; + } + + VarDecl *NewVD; + VarDecl::StorageClass SC; + switch (D.getDeclSpec().getStorageClassSpec()) { + default: assert(0 && "Unknown storage class!"); + case DeclSpec::SCS_unspecified: SC = VarDecl::None; break; + case DeclSpec::SCS_extern: SC = VarDecl::Extern; break; + case DeclSpec::SCS_static: SC = VarDecl::Static; break; + case DeclSpec::SCS_auto: SC = VarDecl::Auto; break; + case DeclSpec::SCS_register: SC = VarDecl::Register; break; + case DeclSpec::SCS_private_extern: SC = VarDecl::PrivateExtern; break; + } + if (S->getParent() == 0) { + // C99 6.9p2: The storage-class specifiers auto and register shall not + // appear in the declaration specifiers in an external declaration. + if (SC == VarDecl::Auto || SC == VarDecl::Register) { + Diag(D.getIdentifierLoc(), diag::err_typecheck_sclass_fscope, + R.getAsString()); + InvalidDecl = true; + } + NewVD = FileVarDecl::Create(Context, D.getIdentifierLoc(), II, R, SC, + LastDeclarator); + } else { + NewVD = BlockVarDecl::Create(Context, D.getIdentifierLoc(), II, R, SC, + LastDeclarator); + } + // Handle attributes prior to checking for duplicates in MergeVarDecl + HandleDeclAttributes(NewVD, D.getDeclSpec().getAttributes(), + D.getAttributes()); + + // Emit an error if an address space was applied to decl with local storage. + // This includes arrays of objects with address space qualifiers, but not + // automatic variables that point to other address spaces. + // ISO/IEC TR 18037 S5.1.2 + if (NewVD->hasLocalStorage()) { + QualType AutoTy = NewVD->getCanonicalType(); + if (const ArrayType *AT = AutoTy->getAsArrayType()) + AutoTy = AT->getElementType().getCanonicalType(); + if (AutoTy.getAddressSpace() != 0) { + Diag(D.getIdentifierLoc(), diag::err_as_qualified_auto_decl); + InvalidDecl = true; + } + } + // Merge the decl with the existing one if appropriate. If the decl is + // in an outer scope, it isn't the same thing. + if (PrevDecl && S->isDeclScope(PrevDecl)) { + NewVD = MergeVarDecl(NewVD, PrevDecl); + if (NewVD == 0) return 0; + } + New = NewVD; + } + + // If this has an identifier, add it to the scope stack. + if (II) { + New->setNext(II->getFETokenInfo<ScopedDecl>()); + II->setFETokenInfo(New); + S->AddDecl(New); + } + // If any semantic error occurred, mark the decl as invalid. + if (D.getInvalidType() || InvalidDecl) + New->setInvalidDecl(); + + return New; +} + +bool Sema::CheckForConstantInitializer(Expr *Init, QualType DclT) { + SourceLocation loc; + // FIXME: Remove the isReference check and handle assignment to a reference. + if (!DclT->isReferenceType() && !Init->isConstantExpr(Context, &loc)) { + assert(loc.isValid() && "isConstantExpr didn't return a loc!"); + Diag(loc, diag::err_init_element_not_constant, Init->getSourceRange()); + return true; + } + return false; +} + +void Sema::AddInitializerToDecl(DeclTy *dcl, ExprTy *init) { + Decl *RealDecl = static_cast<Decl *>(dcl); + Expr *Init = static_cast<Expr *>(init); + assert(Init && "missing initializer"); + + // If there is no declaration, there was an error parsing it. Just ignore + // the initializer. + if (RealDecl == 0) { + delete Init; + return; + } + + VarDecl *VDecl = dyn_cast<VarDecl>(RealDecl); + if (!VDecl) { + Diag(dyn_cast<ScopedDecl>(RealDecl)->getLocation(), + diag::err_illegal_initializer); + RealDecl->setInvalidDecl(); + return; + } + // Get the decls type and save a reference for later, since + // CheckInitializerTypes may change it. + QualType DclT = VDecl->getType(), SavT = DclT; + if (BlockVarDecl *BVD = dyn_cast<BlockVarDecl>(VDecl)) { + VarDecl::StorageClass SC = BVD->getStorageClass(); + if (SC == VarDecl::Extern) { // C99 6.7.8p5 + Diag(VDecl->getLocation(), diag::err_block_extern_cant_init); + BVD->setInvalidDecl(); + } else if (!BVD->isInvalidDecl()) { + if (CheckInitializerTypes(Init, DclT)) + BVD->setInvalidDecl(); + if (SC == VarDecl::Static) // C99 6.7.8p4. + CheckForConstantInitializer(Init, DclT); + } + } else if (FileVarDecl *FVD = dyn_cast<FileVarDecl>(VDecl)) { + if (FVD->getStorageClass() == VarDecl::Extern) + Diag(VDecl->getLocation(), diag::warn_extern_init); + if (!FVD->isInvalidDecl()) + if (CheckInitializerTypes(Init, DclT)) + FVD->setInvalidDecl(); + + // C99 6.7.8p4. All file scoped initializers need to be constant. + CheckForConstantInitializer(Init, DclT); + } + // If the type changed, it means we had an incomplete type that was + // completed by the initializer. For example: + // int ary[] = { 1, 3, 5 }; + // "ary" transitions from a VariableArrayType to a ConstantArrayType. + if (!VDecl->isInvalidDecl() && (DclT != SavT)) { + VDecl->setType(DclT); + Init->setType(DclT); + } + + // Attach the initializer to the decl. + VDecl->setInit(Init); + return; +} + +/// The declarators are chained together backwards, reverse the list. +Sema::DeclTy *Sema::FinalizeDeclaratorGroup(Scope *S, DeclTy *group) { + // Often we have single declarators, handle them quickly. + Decl *GroupDecl = static_cast<Decl*>(group); + if (GroupDecl == 0) + return 0; + + ScopedDecl *Group = dyn_cast<ScopedDecl>(GroupDecl); + ScopedDecl *NewGroup = 0; + if (Group->getNextDeclarator() == 0) + NewGroup = Group; + else { // reverse the list. + while (Group) { + ScopedDecl *Next = Group->getNextDeclarator(); + Group->setNextDeclarator(NewGroup); + NewGroup = Group; + Group = Next; + } + } + // Perform semantic analysis that depends on having fully processed both + // the declarator and initializer. + for (ScopedDecl *ID = NewGroup; ID; ID = ID->getNextDeclarator()) { + VarDecl *IDecl = dyn_cast<VarDecl>(ID); + if (!IDecl) + continue; + FileVarDecl *FVD = dyn_cast<FileVarDecl>(IDecl); + BlockVarDecl *BVD = dyn_cast<BlockVarDecl>(IDecl); + QualType T = IDecl->getType(); + + // C99 6.7.5.2p2: If an identifier is declared to be an object with + // static storage duration, it shall not have a variable length array. + if ((FVD || BVD) && IDecl->getStorageClass() == VarDecl::Static) { + if (T->getAsVariableArrayType()) { + Diag(IDecl->getLocation(), diag::err_typecheck_illegal_vla); + IDecl->setInvalidDecl(); + } + } + // Block scope. C99 6.7p7: If an identifier for an object is declared with + // no linkage (C99 6.2.2p6), the type for the object shall be complete... + if (BVD && IDecl->getStorageClass() != VarDecl::Extern) { + if (T->isIncompleteType()) { + Diag(IDecl->getLocation(), diag::err_typecheck_decl_incomplete_type, + T.getAsString()); + IDecl->setInvalidDecl(); + } + } + // File scope. C99 6.9.2p2: A declaration of an identifier for and + // object that has file scope without an initializer, and without a + // storage-class specifier or with the storage-class specifier "static", + // constitutes a tentative definition. Note: A tentative definition with + // external linkage is valid (C99 6.2.2p5). + if (FVD && !FVD->getInit() && (FVD->getStorageClass() == VarDecl::Static || + FVD->getStorageClass() == VarDecl::None)) { + if (T->isIncompleteArrayType()) { + // C99 6.9.2 (p2, p5): Implicit initialization causes an incomplete + // array to be completed. Don't issue a diagnostic. + } else if (T->isIncompleteType()) { + // C99 6.9.2p3: If the declaration of an identifier for an object is + // a tentative definition and has internal linkage (C99 6.2.2p3), the + // declared type shall not be an incomplete type. + Diag(IDecl->getLocation(), diag::err_typecheck_decl_incomplete_type, + T.getAsString()); + IDecl->setInvalidDecl(); + } + } + } + return NewGroup; +} + +// Called from Sema::ParseStartOfFunctionDef(). +ParmVarDecl * +Sema::ActOnParamDeclarator(struct DeclaratorChunk::ParamInfo &PI, + Scope *FnScope) { + IdentifierInfo *II = PI.Ident; + // TODO: CHECK FOR CONFLICTS, multiple decls with same name in one scope. + // Can this happen for params? We already checked that they don't conflict + // among each other. Here they can only shadow globals, which is ok. + if (/*Decl *PrevDecl = */LookupScopedDecl(II, Decl::IDNS_Ordinary, + PI.IdentLoc, FnScope)) { + + } + + // FIXME: Handle storage class (auto, register). No declarator? + // TODO: Chain to previous parameter with the prevdeclarator chain? + + // Perform the default function/array conversion (C99 6.7.5.3p[7,8]). + // Doing the promotion here has a win and a loss. The win is the type for + // both Decl's and DeclRefExpr's will match (a convenient invariant for the + // code generator). The loss is the orginal type isn't preserved. For example: + // + // void func(int parmvardecl[5]) { // convert "int [5]" to "int *" + // int blockvardecl[5]; + // sizeof(parmvardecl); // size == 4 + // sizeof(blockvardecl); // size == 20 + // } + // + // For expressions, all implicit conversions are captured using the + // ImplicitCastExpr AST node (we have no such mechanism for Decl's). + // + // FIXME: If a source translation tool needs to see the original type, then + // we need to consider storing both types (in ParmVarDecl)... + // + QualType parmDeclType = QualType::getFromOpaquePtr(PI.TypeInfo); + if (const ArrayType *AT = parmDeclType->getAsArrayType()) { + // int x[restrict 4] -> int *restrict + parmDeclType = Context.getPointerType(AT->getElementType()); + parmDeclType = parmDeclType.getQualifiedType(AT->getIndexTypeQualifier()); + } else if (parmDeclType->isFunctionType()) + parmDeclType = Context.getPointerType(parmDeclType); + + ParmVarDecl *New = ParmVarDecl::Create(Context, PI.IdentLoc, II, parmDeclType, + VarDecl::None, 0); + + if (PI.InvalidType) + New->setInvalidDecl(); + + // If this has an identifier, add it to the scope stack. + if (II) { + New->setNext(II->getFETokenInfo<ScopedDecl>()); + II->setFETokenInfo(New); + FnScope->AddDecl(New); + } + + HandleDeclAttributes(New, PI.AttrList, 0); + return New; +} + +Sema::DeclTy *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Declarator &D) { + assert(CurFunctionDecl == 0 && "Function parsing confused"); + assert(D.getTypeObject(0).Kind == DeclaratorChunk::Function && + "Not a function declarator!"); + DeclaratorChunk::FunctionTypeInfo &FTI = D.getTypeObject(0).Fun; + + // Verify 6.9.1p6: 'every identifier in the identifier list shall be declared' + // for a K&R function. + if (!FTI.hasPrototype) { + for (unsigned i = 0, e = FTI.NumArgs; i != e; ++i) { + if (FTI.ArgInfo[i].TypeInfo == 0) { + Diag(FTI.ArgInfo[i].IdentLoc, diag::ext_param_not_declared, + FTI.ArgInfo[i].Ident->getName()); + // Implicitly declare the argument as type 'int' for lack of a better + // type. + FTI.ArgInfo[i].TypeInfo = Context.IntTy.getAsOpaquePtr(); + } + } + + // Since this is a function definition, act as though we have information + // about the arguments. + if (FTI.NumArgs) + FTI.hasPrototype = true; + } else { + // FIXME: Diagnose arguments without names in C. + + } + + Scope *GlobalScope = FnBodyScope->getParent(); + + // See if this is a redefinition. + ScopedDecl *PrevDcl = LookupScopedDecl(D.getIdentifier(), Decl::IDNS_Ordinary, + D.getIdentifierLoc(), GlobalScope); + if (FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(PrevDcl)) { + if (FD->getBody()) { + Diag(D.getIdentifierLoc(), diag::err_redefinition, + D.getIdentifier()->getName()); + Diag(FD->getLocation(), diag::err_previous_definition); + } + } + Decl *decl = static_cast<Decl*>(ActOnDeclarator(GlobalScope, D, 0)); + FunctionDecl *FD = cast<FunctionDecl>(decl); + CurFunctionDecl = FD; + + // Create Decl objects for each parameter, adding them to the FunctionDecl. + llvm::SmallVector<ParmVarDecl*, 16> Params; + + // Check for C99 6.7.5.3p10 - foo(void) is a non-varargs function that takes + // no arguments, not a function that takes a single void argument. + if (FTI.NumArgs == 1 && !FTI.isVariadic && FTI.ArgInfo[0].Ident == 0 && + !QualType::getFromOpaquePtr(FTI.ArgInfo[0].TypeInfo).getCVRQualifiers() && + QualType::getFromOpaquePtr(FTI.ArgInfo[0].TypeInfo)->isVoidType()) { + // empty arg list, don't push any params. + } else { + for (unsigned i = 0, e = FTI.NumArgs; i != e; ++i) { + Params.push_back(ActOnParamDeclarator(D.getTypeObject(0).Fun.ArgInfo[i], + FnBodyScope)); + } + } + + FD->setParams(&Params[0], Params.size()); + + return FD; +} + +Sema::DeclTy *Sema::ActOnFinishFunctionBody(DeclTy *D, StmtTy *Body) { + Decl *dcl = static_cast<Decl *>(D); + if (FunctionDecl *FD = dyn_cast<FunctionDecl>(dcl)) { + FD->setBody((Stmt*)Body); + assert(FD == CurFunctionDecl && "Function parsing confused"); + CurFunctionDecl = 0; + } else if (ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(dcl)) { + MD->setBody((Stmt*)Body); + CurMethodDecl = 0; + } + // Verify and clean out per-function state. + + // Check goto/label use. + for (llvm::DenseMap<IdentifierInfo*, LabelStmt*>::iterator + I = LabelMap.begin(), E = LabelMap.end(); I != E; ++I) { + // Verify that we have no forward references left. If so, there was a goto + // or address of a label taken, but no definition of it. Label fwd + // definitions are indicated with a null substmt. + if (I->second->getSubStmt() == 0) { + LabelStmt *L = I->second; + // Emit error. + Diag(L->getIdentLoc(), diag::err_undeclared_label_use, L->getName()); + + // At this point, we have gotos that use the bogus label. Stitch it into + // the function body so that they aren't leaked and that the AST is well + // formed. + if (Body) { + L->setSubStmt(new NullStmt(L->getIdentLoc())); + cast<CompoundStmt>((Stmt*)Body)->push_back(L); + } else { + // The whole function wasn't parsed correctly, just delete this. + delete L; + } + } + } + LabelMap.clear(); + + return D; +} + +/// ImplicitlyDefineFunction - An undeclared identifier was used in a function +/// call, forming a call to an implicitly defined function (per C99 6.5.1p2). +ScopedDecl *Sema::ImplicitlyDefineFunction(SourceLocation Loc, + IdentifierInfo &II, Scope *S) { + if (getLangOptions().C99) // Extension in C99. + Diag(Loc, diag::ext_implicit_function_decl, II.getName()); + else // Legal in C90, but warn about it. + Diag(Loc, diag::warn_implicit_function_decl, II.getName()); + + // FIXME: handle stuff like: + // void foo() { extern float X(); } + // void bar() { X(); } <-- implicit decl for X in another scope. + + // Set a Declarator for the implicit definition: int foo(); + const char *Dummy; + DeclSpec DS; + bool Error = DS.SetTypeSpecType(DeclSpec::TST_int, Loc, Dummy); + Error = Error; // Silence warning. + assert(!Error && "Error setting up implicit decl!"); + Declarator D(DS, Declarator::BlockContext); + D.AddTypeInfo(DeclaratorChunk::getFunction(false, false, 0, 0, Loc)); + D.SetIdentifier(&II, Loc); + + // Find translation-unit scope to insert this function into. + if (Scope *FnS = S->getFnParent()) + S = FnS->getParent(); // Skip all scopes in a function at once. + while (S->getParent()) + S = S->getParent(); + + return dyn_cast<ScopedDecl>(static_cast<Decl*>(ActOnDeclarator(S, D, 0))); +} + + +TypedefDecl *Sema::ParseTypedefDecl(Scope *S, Declarator &D, QualType T, + ScopedDecl *LastDeclarator) { + assert(D.getIdentifier() && "Wrong callback for declspec without declarator"); + assert(!T.isNull() && "GetTypeForDeclarator() returned null type"); + + // Scope manipulation handled by caller. + TypedefDecl *NewTD = TypedefDecl::Create(Context, D.getIdentifierLoc(), + D.getIdentifier(), + T, LastDeclarator); + if (D.getInvalidType()) + NewTD->setInvalidDecl(); + return NewTD; +} + +/// ActOnTag - This is invoked when we see 'struct foo' or 'struct {'. In the +/// former case, Name will be non-null. In the later case, Name will be null. +/// TagType indicates what kind of tag this is. TK indicates whether this is a +/// reference/declaration/definition of a tag. +Sema::DeclTy *Sema::ActOnTag(Scope *S, unsigned TagType, TagKind TK, + SourceLocation KWLoc, IdentifierInfo *Name, + SourceLocation NameLoc, AttributeList *Attr) { + // If this is a use of an existing tag, it must have a name. + assert((Name != 0 || TK == TK_Definition) && + "Nameless record must be a definition!"); + + Decl::Kind Kind; + switch (TagType) { + default: assert(0 && "Unknown tag type!"); + case DeclSpec::TST_struct: Kind = Decl::Struct; break; + case DeclSpec::TST_union: Kind = Decl::Union; break; +//case DeclSpec::TST_class: Kind = Decl::Class; break; + case DeclSpec::TST_enum: Kind = Decl::Enum; break; + } + + // If this is a named struct, check to see if there was a previous forward + // declaration or definition. + if (TagDecl *PrevDecl = + dyn_cast_or_null<TagDecl>(LookupScopedDecl(Name, Decl::IDNS_Tag, + NameLoc, S))) { + + // If this is a use of a previous tag, or if the tag is already declared in + // the same scope (so that the definition/declaration completes or + // rementions the tag), reuse the decl. + if (TK == TK_Reference || S->isDeclScope(PrevDecl)) { + // Make sure that this wasn't declared as an enum and now used as a struct + // or something similar. + if (PrevDecl->getKind() != Kind) { + Diag(KWLoc, diag::err_use_with_wrong_tag, Name->getName()); + Diag(PrevDecl->getLocation(), diag::err_previous_use); + } + + // If this is a use or a forward declaration, we're good. + if (TK != TK_Definition) + return PrevDecl; + + // Diagnose attempts to redefine a tag. + if (PrevDecl->isDefinition()) { + Diag(NameLoc, diag::err_redefinition, Name->getName()); + Diag(PrevDecl->getLocation(), diag::err_previous_definition); + // If this is a redefinition, recover by making this struct be + // anonymous, which will make any later references get the previous + // definition. + Name = 0; + } else { + // Okay, this is definition of a previously declared or referenced tag. + // Move the location of the decl to be the definition site. + PrevDecl->setLocation(NameLoc); + return PrevDecl; + } + } + // If we get here, this is a definition of a new struct type in a nested + // scope, e.g. "struct foo; void bar() { struct foo; }", just create a new + // type. + } + + // If there is an identifier, use the location of the identifier as the + // location of the decl, otherwise use the location of the struct/union + // keyword. + SourceLocation Loc = NameLoc.isValid() ? NameLoc : KWLoc; + + // Otherwise, if this is the first time we've seen this tag, create the decl. + TagDecl *New; + switch (Kind) { + default: assert(0 && "Unknown tag kind!"); + case Decl::Enum: + // FIXME: Tag decls should be chained to any simultaneous vardecls, e.g.: + // enum X { A, B, C } D; D should chain to X. + New = EnumDecl::Create(Context, Loc, Name, 0); + // If this is an undefined enum, warn. + if (TK != TK_Definition) Diag(Loc, diag::ext_forward_ref_enum); + break; + case Decl::Union: + case Decl::Struct: + case Decl::Class: + // FIXME: Tag decls should be chained to any simultaneous vardecls, e.g.: + // struct X { int A; } D; D should chain to X. + New = RecordDecl::Create(Context, Kind, Loc, Name, 0); + break; + } + + // If this has an identifier, add it to the scope stack. + if (Name) { + // The scope passed in may not be a decl scope. Zip up the scope tree until + // we find one that is. + while ((S->getFlags() & Scope::DeclScope) == 0) + S = S->getParent(); + + // Add it to the decl chain. + New->setNext(Name->getFETokenInfo<ScopedDecl>()); + Name->setFETokenInfo(New); + S->AddDecl(New); + } + + HandleDeclAttributes(New, Attr, 0); + return New; +} + +/// ActOnField - Each field of a struct/union/class is passed into this in order +/// to create a FieldDecl object for it. +Sema::DeclTy *Sema::ActOnField(Scope *S, DeclTy *TagDecl, + SourceLocation DeclStart, + Declarator &D, ExprTy *BitfieldWidth) { + IdentifierInfo *II = D.getIdentifier(); + Expr *BitWidth = (Expr*)BitfieldWidth; + SourceLocation Loc = DeclStart; + if (II) Loc = D.getIdentifierLoc(); + + // FIXME: Unnamed fields can be handled in various different ways, for + // example, unnamed unions inject all members into the struct namespace! + + + if (BitWidth) { + // TODO: Validate. + //printf("WARNING: BITFIELDS IGNORED!\n"); + + // 6.7.2.1p3 + // 6.7.2.1p4 + + } else { + // Not a bitfield. + + // validate II. + + } + + QualType T = GetTypeForDeclarator(D, S); + assert(!T.isNull() && "GetTypeForDeclarator() returned null type"); + bool InvalidDecl = false; + + // C99 6.7.2.1p8: A member of a structure or union may have any type other + // than a variably modified type. + if (T->isVariablyModifiedType()) { + // FIXME: This diagnostic needs work + Diag(Loc, diag::err_typecheck_illegal_vla, Loc); + InvalidDecl = true; + } + // FIXME: Chain fielddecls together. + FieldDecl *NewFD; + + if (isa<RecordDecl>(static_cast<Decl *>(TagDecl))) + NewFD = new FieldDecl(Loc, II, T, BitWidth); + else if (isa<ObjCInterfaceDecl>(static_cast<Decl *>(TagDecl)) || + isa<ObjCImplementationDecl>(static_cast<Decl *>(TagDecl)) || + isa<ObjCCategoryDecl>(static_cast<Decl *>(TagDecl)) || + // FIXME: ivars are currently used to model properties, and + // properties can appear within a protocol. + // See corresponding FIXME in DeclObjC.h:ObjCPropertyDecl. + isa<ObjCProtocolDecl>(static_cast<Decl *>(TagDecl))) + NewFD = new ObjCIvarDecl(Loc, II, T); + else + assert(0 && "Sema::ActOnField(): Unknown TagDecl"); + + HandleDeclAttributes(NewFD, D.getDeclSpec().getAttributes(), + D.getAttributes()); + + if (D.getInvalidType() || InvalidDecl) + NewFD->setInvalidDecl(); + return NewFD; +} + +/// TranslateIvarVisibility - Translate visibility from a token ID to an +/// AST enum value. +static ObjCIvarDecl::AccessControl +TranslateIvarVisibility(tok::ObjCKeywordKind ivarVisibility) { + switch (ivarVisibility) { + case tok::objc_private: return ObjCIvarDecl::Private; + case tok::objc_public: return ObjCIvarDecl::Public; + case tok::objc_protected: return ObjCIvarDecl::Protected; + case tok::objc_package: return ObjCIvarDecl::Package; + default: assert(false && "Unknown visitibility kind"); + } +} + +void Sema::ActOnFields(Scope* S, + SourceLocation RecLoc, DeclTy *RecDecl, + DeclTy **Fields, unsigned NumFields, + SourceLocation LBrac, SourceLocation RBrac, + tok::ObjCKeywordKind *visibility) { + Decl *EnclosingDecl = static_cast<Decl*>(RecDecl); + assert(EnclosingDecl && "missing record or interface decl"); + RecordDecl *Record = dyn_cast<RecordDecl>(EnclosingDecl); + + if (Record && Record->isDefinition()) { + // Diagnose code like: + // struct S { struct S {} X; }; + // We discover this when we complete the outer S. Reject and ignore the + // outer S. + Diag(Record->getLocation(), diag::err_nested_redefinition, + Record->getKindName()); + Diag(RecLoc, diag::err_previous_definition); + Record->setInvalidDecl(); + return; + } + // Verify that all the fields are okay. + unsigned NumNamedMembers = 0; + llvm::SmallVector<FieldDecl*, 32> RecFields; + llvm::SmallSet<const IdentifierInfo*, 32> FieldIDs; + + for (unsigned i = 0; i != NumFields; ++i) { + + FieldDecl *FD = cast_or_null<FieldDecl>(static_cast<Decl*>(Fields[i])); + assert(FD && "missing field decl"); + + // Remember all fields. + RecFields.push_back(FD); + + // Get the type for the field. + Type *FDTy = FD->getType().getTypePtr(); + + // If we have visibility info, make sure the AST is set accordingly. + if (visibility) + cast<ObjCIvarDecl>(FD)->setAccessControl( + TranslateIvarVisibility(visibility[i])); + + // C99 6.7.2.1p2 - A field may not be a function type. + if (FDTy->isFunctionType()) { + Diag(FD->getLocation(), diag::err_field_declared_as_function, + FD->getName()); + FD->setInvalidDecl(); + EnclosingDecl->setInvalidDecl(); + continue; + } + // C99 6.7.2.1p2 - A field may not be an incomplete type except... + if (FDTy->isIncompleteType()) { + if (!Record) { // Incomplete ivar type is always an error. + Diag(FD->getLocation(), diag::err_field_incomplete, FD->getName()); + FD->setInvalidDecl(); + EnclosingDecl->setInvalidDecl(); + continue; + } + if (i != NumFields-1 || // ... that the last member ... + Record->getKind() != Decl::Struct || // ... of a structure ... + !FDTy->isArrayType()) { //... may have incomplete array type. + Diag(FD->getLocation(), diag::err_field_incomplete, FD->getName()); + FD->setInvalidDecl(); + EnclosingDecl->setInvalidDecl(); + continue; + } + if (NumNamedMembers < 1) { //... must have more than named member ... + Diag(FD->getLocation(), diag::err_flexible_array_empty_struct, + FD->getName()); + FD->setInvalidDecl(); + EnclosingDecl->setInvalidDecl(); + continue; + } + // Okay, we have a legal flexible array member at the end of the struct. + if (Record) + Record->setHasFlexibleArrayMember(true); + } + /// C99 6.7.2.1p2 - a struct ending in a flexible array member cannot be the + /// field of another structure or the element of an array. + if (const RecordType *FDTTy = FDTy->getAsRecordType()) { + if (FDTTy->getDecl()->hasFlexibleArrayMember()) { + // If this is a member of a union, then entire union becomes "flexible". + if (Record && Record->getKind() == Decl::Union) { + Record->setHasFlexibleArrayMember(true); + } else { + // If this is a struct/class and this is not the last element, reject + // it. Note that GCC supports variable sized arrays in the middle of + // structures. + if (i != NumFields-1) { + Diag(FD->getLocation(), diag::err_variable_sized_type_in_struct, + FD->getName()); + FD->setInvalidDecl(); + EnclosingDecl->setInvalidDecl(); + continue; + } + // We support flexible arrays at the end of structs in other structs + // as an extension. + Diag(FD->getLocation(), diag::ext_flexible_array_in_struct, + FD->getName()); + if (Record) + Record->setHasFlexibleArrayMember(true); + } + } + } + /// A field cannot be an Objective-c object + if (FDTy->isObjCInterfaceType()) { + Diag(FD->getLocation(), diag::err_statically_allocated_object, + FD->getName()); + FD->setInvalidDecl(); + EnclosingDecl->setInvalidDecl(); + continue; + } + // Keep track of the number of named members. + if (IdentifierInfo *II = FD->getIdentifier()) { + // Detect duplicate member names. + if (!FieldIDs.insert(II)) { + Diag(FD->getLocation(), diag::err_duplicate_member, II->getName()); + // Find the previous decl. + SourceLocation PrevLoc; + for (unsigned i = 0, e = RecFields.size(); ; ++i) { + assert(i != e && "Didn't find previous def!"); + if (RecFields[i]->getIdentifier() == II) { + PrevLoc = RecFields[i]->getLocation(); + break; + } + } + Diag(PrevLoc, diag::err_previous_definition); + FD->setInvalidDecl(); + EnclosingDecl->setInvalidDecl(); + continue; + } + ++NumNamedMembers; + } + } + + // Okay, we successfully defined 'Record'. + if (Record) { + Record->defineBody(&RecFields[0], RecFields.size()); + Consumer.HandleTagDeclDefinition(Record); + } else { + ObjCIvarDecl **ClsFields = reinterpret_cast<ObjCIvarDecl**>(&RecFields[0]); + if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(EnclosingDecl)) + ID->addInstanceVariablesToClass(ClsFields, RecFields.size(), RBrac); + else if (ObjCImplementationDecl *IMPDecl = + dyn_cast<ObjCImplementationDecl>(EnclosingDecl)) { + assert(IMPDecl && "ActOnFields - missing ObjCImplementationDecl"); + IMPDecl->ObjCAddInstanceVariablesToClassImpl(ClsFields, RecFields.size()); + CheckImplementationIvars(IMPDecl, ClsFields, RecFields.size(), RBrac); + } + } +} + +Sema::DeclTy *Sema::ActOnEnumConstant(Scope *S, DeclTy *theEnumDecl, + DeclTy *lastEnumConst, + SourceLocation IdLoc, IdentifierInfo *Id, + SourceLocation EqualLoc, ExprTy *val) { + theEnumDecl = theEnumDecl; // silence unused warning. + EnumConstantDecl *LastEnumConst = + cast_or_null<EnumConstantDecl>(static_cast<Decl*>(lastEnumConst)); + Expr *Val = static_cast<Expr*>(val); + + // The scope passed in may not be a decl scope. Zip up the scope tree until + // we find one that is. + while ((S->getFlags() & Scope::DeclScope) == 0) + S = S->getParent(); + + // Verify that there isn't already something declared with this name in this + // scope. + if (ScopedDecl *PrevDecl = LookupScopedDecl(Id, Decl::IDNS_Ordinary, + IdLoc, S)) { + if (S->isDeclScope(PrevDecl)) { + if (isa<EnumConstantDecl>(PrevDecl)) + Diag(IdLoc, diag::err_redefinition_of_enumerator, Id->getName()); + else + Diag(IdLoc, diag::err_redefinition, Id->getName()); + Diag(PrevDecl->getLocation(), diag::err_previous_definition); + delete Val; + return 0; + } + } + + llvm::APSInt EnumVal(32); + QualType EltTy; + if (Val) { + // Make sure to promote the operand type to int. + UsualUnaryConversions(Val); + + // C99 6.7.2.2p2: Make sure we have an integer constant expression. + SourceLocation ExpLoc; + if (!Val->isIntegerConstantExpr(EnumVal, Context, &ExpLoc)) { + Diag(ExpLoc, diag::err_enum_value_not_integer_constant_expr, + Id->getName()); + delete Val; + Val = 0; // Just forget about it. + } else { + EltTy = Val->getType(); + } + } + + if (!Val) { + if (LastEnumConst) { + // Assign the last value + 1. + EnumVal = LastEnumConst->getInitVal(); + ++EnumVal; + + // Check for overflow on increment. + if (EnumVal < LastEnumConst->getInitVal()) + Diag(IdLoc, diag::warn_enum_value_overflow); + + EltTy = LastEnumConst->getType(); + } else { + // First value, set to zero. + EltTy = Context.IntTy; + EnumVal.zextOrTrunc(static_cast<uint32_t>(Context.getTypeSize(EltTy))); + } + } + + EnumConstantDecl *New = + EnumConstantDecl::Create(Context, IdLoc, Id, EltTy, Val, EnumVal, + LastEnumConst); + + // Register this decl in the current scope stack. + New->setNext(Id->getFETokenInfo<ScopedDecl>()); + Id->setFETokenInfo(New); + S->AddDecl(New); + return New; +} + +void Sema::ActOnEnumBody(SourceLocation EnumLoc, DeclTy *EnumDeclX, + DeclTy **Elements, unsigned NumElements) { + EnumDecl *Enum = cast<EnumDecl>(static_cast<Decl*>(EnumDeclX)); + assert(!Enum->isDefinition() && "Enum redefinitions can't reach here"); + + // TODO: If the result value doesn't fit in an int, it must be a long or long + // long value. ISO C does not support this, but GCC does as an extension, + // emit a warning. + unsigned IntWidth = Context.Target.getIntWidth(); + + // Verify that all the values are okay, compute the size of the values, and + // reverse the list. + unsigned NumNegativeBits = 0; + unsigned NumPositiveBits = 0; + + // Keep track of whether all elements have type int. + bool AllElementsInt = true; + + EnumConstantDecl *EltList = 0; + for (unsigned i = 0; i != NumElements; ++i) { + EnumConstantDecl *ECD = + cast_or_null<EnumConstantDecl>(static_cast<Decl*>(Elements[i])); + if (!ECD) continue; // Already issued a diagnostic. + + // If the enum value doesn't fit in an int, emit an extension warning. + const llvm::APSInt &InitVal = ECD->getInitVal(); + assert(InitVal.getBitWidth() >= IntWidth && + "Should have promoted value to int"); + if (InitVal.getBitWidth() > IntWidth) { + llvm::APSInt V(InitVal); + V.trunc(IntWidth); + V.extend(InitVal.getBitWidth()); + if (V != InitVal) + Diag(ECD->getLocation(), diag::ext_enum_value_not_int, + InitVal.toString()); + } + + // Keep track of the size of positive and negative values. + if (InitVal.isUnsigned() || InitVal.isNonNegative()) + NumPositiveBits = std::max(NumPositiveBits, + (unsigned)InitVal.getActiveBits()); + else + NumNegativeBits = std::max(NumNegativeBits, + (unsigned)InitVal.getMinSignedBits()); + + // Keep track of whether every enum element has type int (very commmon). + if (AllElementsInt) + AllElementsInt = ECD->getType() == Context.IntTy; + + ECD->setNextDeclarator(EltList); + EltList = ECD; + } + + // Figure out the type that should be used for this enum. + // FIXME: Support attribute(packed) on enums and -fshort-enums. + QualType BestType; + unsigned BestWidth; + + if (NumNegativeBits) { + // If there is a negative value, figure out the smallest integer type (of + // int/long/longlong) that fits. + if (NumNegativeBits <= IntWidth && NumPositiveBits < IntWidth) { + BestType = Context.IntTy; + BestWidth = IntWidth; + } else { + BestWidth = Context.Target.getLongWidth(); + + if (NumNegativeBits <= BestWidth && NumPositiveBits < BestWidth) + BestType = Context.LongTy; + else { + BestWidth = Context.Target.getLongLongWidth(); + + if (NumNegativeBits > BestWidth || NumPositiveBits >= BestWidth) + Diag(Enum->getLocation(), diag::warn_enum_too_large); + BestType = Context.LongLongTy; + } + } + } else { + // If there is no negative value, figure out which of uint, ulong, ulonglong + // fits. + if (NumPositiveBits <= IntWidth) { + BestType = Context.UnsignedIntTy; + BestWidth = IntWidth; + } else if (NumPositiveBits <= + (BestWidth = Context.Target.getLongWidth())) { + BestType = Context.UnsignedLongTy; + } else { + BestWidth = Context.Target.getLongLongWidth(); + assert(NumPositiveBits <= BestWidth && + "How could an initializer get larger than ULL?"); + BestType = Context.UnsignedLongLongTy; + } + } + + // Loop over all of the enumerator constants, changing their types to match + // the type of the enum if needed. + for (unsigned i = 0; i != NumElements; ++i) { + EnumConstantDecl *ECD = + cast_or_null<EnumConstantDecl>(static_cast<Decl*>(Elements[i])); + if (!ECD) continue; // Already issued a diagnostic. + + // Standard C says the enumerators have int type, but we allow, as an + // extension, the enumerators to be larger than int size. If each + // enumerator value fits in an int, type it as an int, otherwise type it the + // same as the enumerator decl itself. This means that in "enum { X = 1U }" + // that X has type 'int', not 'unsigned'. + if (ECD->getType() == Context.IntTy) { + // Make sure the init value is signed. + llvm::APSInt IV = ECD->getInitVal(); + IV.setIsSigned(true); + ECD->setInitVal(IV); + continue; // Already int type. + } + + // Determine whether the value fits into an int. + llvm::APSInt InitVal = ECD->getInitVal(); + bool FitsInInt; + if (InitVal.isUnsigned() || !InitVal.isNegative()) + FitsInInt = InitVal.getActiveBits() < IntWidth; + else + FitsInInt = InitVal.getMinSignedBits() <= IntWidth; + + // If it fits into an integer type, force it. Otherwise force it to match + // the enum decl type. + QualType NewTy; + unsigned NewWidth; + bool NewSign; + if (FitsInInt) { + NewTy = Context.IntTy; + NewWidth = IntWidth; + NewSign = true; + } else if (ECD->getType() == BestType) { + // Already the right type! + continue; + } else { + NewTy = BestType; + NewWidth = BestWidth; + NewSign = BestType->isSignedIntegerType(); + } + + // Adjust the APSInt value. + InitVal.extOrTrunc(NewWidth); + InitVal.setIsSigned(NewSign); + ECD->setInitVal(InitVal); + + // Adjust the Expr initializer and type. + ECD->setInitExpr(new ImplicitCastExpr(NewTy, ECD->getInitExpr())); + ECD->setType(NewTy); + } + + Enum->defineElements(EltList, BestType); + Consumer.HandleTagDeclDefinition(Enum); +} + +Sema::DeclTy *Sema::ActOnFileScopeAsmDecl(SourceLocation Loc, + ExprTy *expr) { + StringLiteral *AsmString = cast<StringLiteral>((Expr*)expr); + + return new FileScopeAsmDecl(Loc, AsmString); +} + +Sema::DeclTy* Sema::ActOnLinkageSpec(SourceLocation Loc, + SourceLocation LBrace, + SourceLocation RBrace, + const char *Lang, + unsigned StrSize, + DeclTy *D) { + LinkageSpecDecl::LanguageIDs Language; + Decl *dcl = static_cast<Decl *>(D); + if (strncmp(Lang, "\"C\"", StrSize) == 0) + Language = LinkageSpecDecl::lang_c; + else if (strncmp(Lang, "\"C++\"", StrSize) == 0) + Language = LinkageSpecDecl::lang_cxx; + else { + Diag(Loc, diag::err_bad_language); + return 0; + } + + // FIXME: Add all the various semantics of linkage specifications + return new LinkageSpecDecl(Loc, Language, dcl); +} + +void Sema::HandleDeclAttribute(Decl *New, AttributeList *Attr) { + + switch (Attr->getKind()) { + case AttributeList::AT_vector_size: + if (ValueDecl *vDecl = dyn_cast<ValueDecl>(New)) { + QualType newType = HandleVectorTypeAttribute(vDecl->getType(), Attr); + if (!newType.isNull()) // install the new vector type into the decl + vDecl->setType(newType); + } + if (TypedefDecl *tDecl = dyn_cast<TypedefDecl>(New)) { + QualType newType = HandleVectorTypeAttribute(tDecl->getUnderlyingType(), + Attr); + if (!newType.isNull()) // install the new vector type into the decl + tDecl->setUnderlyingType(newType); + } + break; + case AttributeList::AT_ocu_vector_type: + if (TypedefDecl *tDecl = dyn_cast<TypedefDecl>(New)) + HandleOCUVectorTypeAttribute(tDecl, Attr); + else + Diag(Attr->getLoc(), + diag::err_typecheck_ocu_vector_not_typedef); + break; + case AttributeList::AT_address_space: + if (TypedefDecl *tDecl = dyn_cast<TypedefDecl>(New)) { + QualType newType = HandleAddressSpaceTypeAttribute( + tDecl->getUnderlyingType(), + Attr); + tDecl->setUnderlyingType(newType); + } else if (ValueDecl *vDecl = dyn_cast<ValueDecl>(New)) { + QualType newType = HandleAddressSpaceTypeAttribute(vDecl->getType(), + Attr); + // install the new addr spaced type into the decl + vDecl->setType(newType); + } + break; + case AttributeList::AT_deprecated: + HandleDeprecatedAttribute(New, Attr); + break; + case AttributeList::AT_visibility: + HandleVisibilityAttribute(New, Attr); + break; + case AttributeList::AT_weak: + HandleWeakAttribute(New, Attr); + break; + case AttributeList::AT_dllimport: + HandleDLLImportAttribute(New, Attr); + break; + case AttributeList::AT_dllexport: + HandleDLLExportAttribute(New, Attr); + break; + case AttributeList::AT_nothrow: + HandleNothrowAttribute(New, Attr); + break; + case AttributeList::AT_stdcall: + HandleStdCallAttribute(New, Attr); + break; + case AttributeList::AT_fastcall: + HandleFastCallAttribute(New, Attr); + break; + case AttributeList::AT_aligned: + HandleAlignedAttribute(New, Attr); + break; + case AttributeList::AT_packed: + HandlePackedAttribute(New, Attr); + break; + case AttributeList::AT_annotate: + HandleAnnotateAttribute(New, Attr); + break; + case AttributeList::AT_noreturn: + HandleNoReturnAttribute(New, Attr); + break; + case AttributeList::AT_format: + HandleFormatAttribute(New, Attr); + break; + default: +#if 0 + // TODO: when we have the full set of attributes, warn about unknown ones. + Diag(Attr->getLoc(), diag::warn_attribute_ignored, + Attr->getName()->getName()); +#endif + break; + } +} + +void Sema::HandleDeclAttributes(Decl *New, AttributeList *declspec_prefix, + AttributeList *declarator_postfix) { + while (declspec_prefix) { + HandleDeclAttribute(New, declspec_prefix); + declspec_prefix = declspec_prefix->getNext(); + } + while (declarator_postfix) { + HandleDeclAttribute(New, declarator_postfix); + declarator_postfix = declarator_postfix->getNext(); + } +} + +void Sema::HandleOCUVectorTypeAttribute(TypedefDecl *tDecl, + AttributeList *rawAttr) { + QualType curType = tDecl->getUnderlyingType(); + // check the attribute arguments. + if (rawAttr->getNumArgs() != 1) { + Diag(rawAttr->getLoc(), diag::err_attribute_wrong_number_arguments, + std::string("1")); + return; + } + Expr *sizeExpr = static_cast<Expr *>(rawAttr->getArg(0)); + llvm::APSInt vecSize(32); + if (!sizeExpr->isIntegerConstantExpr(vecSize, Context)) { + Diag(rawAttr->getLoc(), diag::err_attribute_argument_not_int, + "ocu_vector_type", sizeExpr->getSourceRange()); + return; + } + // unlike gcc's vector_size attribute, we do not allow vectors to be defined + // in conjunction with complex types (pointers, arrays, functions, etc.). + Type *canonType = curType.getCanonicalType().getTypePtr(); + if (!(canonType->isIntegerType() || canonType->isRealFloatingType())) { + Diag(rawAttr->getLoc(), diag::err_attribute_invalid_vector_type, + curType.getCanonicalType().getAsString()); + return; + } + // unlike gcc's vector_size attribute, the size is specified as the + // number of elements, not the number of bytes. + unsigned vectorSize = static_cast<unsigned>(vecSize.getZExtValue()); + + if (vectorSize == 0) { + Diag(rawAttr->getLoc(), diag::err_attribute_zero_size, + sizeExpr->getSourceRange()); + return; + } + // Instantiate/Install the vector type, the number of elements is > 0. + tDecl->setUnderlyingType(Context.getOCUVectorType(curType, vectorSize)); + // Remember this typedef decl, we will need it later for diagnostics. + OCUVectorDecls.push_back(tDecl); +} + +QualType Sema::HandleVectorTypeAttribute(QualType curType, + AttributeList *rawAttr) { + // check the attribute arugments. + if (rawAttr->getNumArgs() != 1) { + Diag(rawAttr->getLoc(), diag::err_attribute_wrong_number_arguments, + std::string("1")); + return QualType(); + } + Expr *sizeExpr = static_cast<Expr *>(rawAttr->getArg(0)); + llvm::APSInt vecSize(32); + if (!sizeExpr->isIntegerConstantExpr(vecSize, Context)) { + Diag(rawAttr->getLoc(), diag::err_attribute_argument_not_int, + "vector_size", sizeExpr->getSourceRange()); + return QualType(); + } + // navigate to the base type - we need to provide for vector pointers, + // vector arrays, and functions returning vectors. + Type *canonType = curType.getCanonicalType().getTypePtr(); + + if (canonType->isPointerType() || canonType->isArrayType() || + canonType->isFunctionType()) { + assert(0 && "HandleVector(): Complex type construction unimplemented"); + /* FIXME: rebuild the type from the inside out, vectorizing the inner type. + do { + if (PointerType *PT = dyn_cast<PointerType>(canonType)) + canonType = PT->getPointeeType().getTypePtr(); + else if (ArrayType *AT = dyn_cast<ArrayType>(canonType)) + canonType = AT->getElementType().getTypePtr(); + else if (FunctionType *FT = dyn_cast<FunctionType>(canonType)) + canonType = FT->getResultType().getTypePtr(); + } while (canonType->isPointerType() || canonType->isArrayType() || + canonType->isFunctionType()); + */ + } + // the base type must be integer or float. + if (!(canonType->isIntegerType() || canonType->isRealFloatingType())) { + Diag(rawAttr->getLoc(), diag::err_attribute_invalid_vector_type, + curType.getCanonicalType().getAsString()); + return QualType(); + } + unsigned typeSize = static_cast<unsigned>(Context.getTypeSize(curType)); + // vecSize is specified in bytes - convert to bits. + unsigned vectorSize = static_cast<unsigned>(vecSize.getZExtValue() * 8); + + // the vector size needs to be an integral multiple of the type size. + if (vectorSize % typeSize) { + Diag(rawAttr->getLoc(), diag::err_attribute_invalid_size, + sizeExpr->getSourceRange()); + return QualType(); + } + if (vectorSize == 0) { + Diag(rawAttr->getLoc(), diag::err_attribute_zero_size, + sizeExpr->getSourceRange()); + return QualType(); + } + // Instantiate the vector type, the number of elements is > 0, and not + // required to be a power of 2, unlike GCC. + return Context.getVectorType(curType, vectorSize/typeSize); +} + +void Sema::HandlePackedAttribute(Decl *d, AttributeList *rawAttr) { + // check the attribute arguments. + if (rawAttr->getNumArgs() > 0) { + Diag(rawAttr->getLoc(), diag::err_attribute_wrong_number_arguments, + std::string("0")); + return; + } + + if (TagDecl *TD = dyn_cast<TagDecl>(d)) + TD->addAttr(new PackedAttr); + else if (FieldDecl *FD = dyn_cast<FieldDecl>(d)) { + // If the alignment is less than or equal to 8 bits, the packed attribute + // has no effect. + if (Context.getTypeAlign(FD->getType()) <= 8) + Diag(rawAttr->getLoc(), + diag::warn_attribute_ignored_for_field_of_type, + rawAttr->getName()->getName(), FD->getType().getAsString()); + else + FD->addAttr(new PackedAttr); + } else + Diag(rawAttr->getLoc(), diag::warn_attribute_ignored, + rawAttr->getName()->getName()); +} + +void Sema::HandleNoReturnAttribute(Decl *d, AttributeList *rawAttr) { + // check the attribute arguments. + if (rawAttr->getNumArgs() != 0) { + Diag(rawAttr->getLoc(), diag::err_attribute_wrong_number_arguments, + std::string("0")); + return; + } + + FunctionDecl *Fn = dyn_cast<FunctionDecl>(d); + + if (!Fn) { + Diag(rawAttr->getLoc(), diag::warn_attribute_wrong_decl_type, + "noreturn", "function"); + return; + } + + d->addAttr(new NoReturnAttr()); +} + +void Sema::HandleDeprecatedAttribute(Decl *d, AttributeList *rawAttr) { + // check the attribute arguments. + if (rawAttr->getNumArgs() != 0) { + Diag(rawAttr->getLoc(), diag::err_attribute_wrong_number_arguments, + std::string("0")); + return; + } + + d->addAttr(new DeprecatedAttr()); +} + +void Sema::HandleVisibilityAttribute(Decl *d, AttributeList *rawAttr) { + // check the attribute arguments. + if (rawAttr->getNumArgs() != 1) { + Diag(rawAttr->getLoc(), diag::err_attribute_wrong_number_arguments, + std::string("1")); + return; + } + + Expr *Arg = static_cast<Expr*>(rawAttr->getArg(0)); + Arg = Arg->IgnoreParenCasts(); + StringLiteral *Str = dyn_cast<StringLiteral>(Arg); + + if (Str == 0 || Str->isWide()) { + Diag(rawAttr->getLoc(), diag::err_attribute_argument_n_not_string, + "visibility", std::string("1")); + return; + } + + const char *TypeStr = Str->getStrData(); + unsigned TypeLen = Str->getByteLength(); + llvm::GlobalValue::VisibilityTypes type; + + if (TypeLen == 7 && !memcmp(TypeStr, "default", 7)) + type = llvm::GlobalValue::DefaultVisibility; + else if (TypeLen == 6 && !memcmp(TypeStr, "hidden", 6)) + type = llvm::GlobalValue::HiddenVisibility; + else if (TypeLen == 8 && !memcmp(TypeStr, "internal", 8)) + type = llvm::GlobalValue::HiddenVisibility; // FIXME + else if (TypeLen == 9 && !memcmp(TypeStr, "protected", 9)) + type = llvm::GlobalValue::ProtectedVisibility; + else { + Diag(rawAttr->getLoc(), diag::warn_attribute_type_not_supported, + "visibility", TypeStr); + return; + } + + d->addAttr(new VisibilityAttr(type)); +} + +void Sema::HandleWeakAttribute(Decl *d, AttributeList *rawAttr) { + // check the attribute arguments. + if (rawAttr->getNumArgs() != 0) { + Diag(rawAttr->getLoc(), diag::err_attribute_wrong_number_arguments, + std::string("0")); + return; + } + + d->addAttr(new WeakAttr()); +} + +void Sema::HandleDLLImportAttribute(Decl *d, AttributeList *rawAttr) { + // check the attribute arguments. + if (rawAttr->getNumArgs() != 0) { + Diag(rawAttr->getLoc(), diag::err_attribute_wrong_number_arguments, + std::string("0")); + return; + } + + d->addAttr(new DLLImportAttr()); +} + +void Sema::HandleDLLExportAttribute(Decl *d, AttributeList *rawAttr) { + // check the attribute arguments. + if (rawAttr->getNumArgs() != 0) { + Diag(rawAttr->getLoc(), diag::err_attribute_wrong_number_arguments, + std::string("0")); + return; + } + + d->addAttr(new DLLExportAttr()); +} + +void Sema::HandleStdCallAttribute(Decl *d, AttributeList *rawAttr) { + // check the attribute arguments. + if (rawAttr->getNumArgs() != 0) { + Diag(rawAttr->getLoc(), diag::err_attribute_wrong_number_arguments, + std::string("0")); + return; + } + + d->addAttr(new StdCallAttr()); +} + +void Sema::HandleFastCallAttribute(Decl *d, AttributeList *rawAttr) { + // check the attribute arguments. + if (rawAttr->getNumArgs() != 0) { + Diag(rawAttr->getLoc(), diag::err_attribute_wrong_number_arguments, + std::string("0")); + return; + } + + d->addAttr(new FastCallAttr()); +} + +void Sema::HandleNothrowAttribute(Decl *d, AttributeList *rawAttr) { + // check the attribute arguments. + if (rawAttr->getNumArgs() != 0) { + Diag(rawAttr->getLoc(), diag::err_attribute_wrong_number_arguments, + std::string("0")); + return; + } + + d->addAttr(new NoThrowAttr()); +} + +/// Handle __attribute__((format(type,idx,firstarg))) attributes +/// based on http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html +void Sema::HandleFormatAttribute(Decl *d, AttributeList *rawAttr) { + + if (!rawAttr->getParameterName()) { + Diag(rawAttr->getLoc(), diag::err_attribute_argument_n_not_string, + "format", std::string("1")); + return; + } + + if (rawAttr->getNumArgs() != 2) { + Diag(rawAttr->getLoc(), diag::err_attribute_wrong_number_arguments, + std::string("3")); + return; + } + + FunctionDecl *Fn = dyn_cast<FunctionDecl>(d); + if (!Fn) { + Diag(rawAttr->getLoc(), diag::warn_attribute_wrong_decl_type, + "format", "function"); + return; + } + + const FunctionTypeProto *proto = + dyn_cast<FunctionTypeProto>(Fn->getType()->getAsFunctionType()); + if (!proto) + return; + + // FIXME: in C++ the implicit 'this' function parameter also counts. + // this is needed in order to be compatible with GCC + // the index must start in 1 and the limit is numargs+1 + unsigned NumArgs = Fn->getNumParams(); + unsigned FirstIdx = 1; + + const char *Format = rawAttr->getParameterName()->getName(); + unsigned FormatLen = rawAttr->getParameterName()->getLength(); + + // Normalize the argument, __foo__ becomes foo. + if (FormatLen > 4 && Format[0] == '_' && Format[1] == '_' && + Format[FormatLen - 2] == '_' && Format[FormatLen - 1] == '_') { + Format += 2; + FormatLen -= 4; + } + + if (!((FormatLen == 5 && !memcmp(Format, "scanf", 5)) + || (FormatLen == 6 && !memcmp(Format, "printf", 6)) + || (FormatLen == 7 && !memcmp(Format, "strfmon", 7)) + || (FormatLen == 8 && !memcmp(Format, "strftime", 8)))) { + Diag(rawAttr->getLoc(), diag::warn_attribute_type_not_supported, + "format", rawAttr->getParameterName()->getName()); + return; + } + + // checks for the 2nd argument + Expr *IdxExpr = static_cast<Expr *>(rawAttr->getArg(0)); + llvm::APSInt Idx(Context.getTypeSize(IdxExpr->getType())); + if (!IdxExpr->isIntegerConstantExpr(Idx, Context)) { + Diag(rawAttr->getLoc(), diag::err_attribute_argument_n_not_int, + "format", std::string("2"), IdxExpr->getSourceRange()); + return; + } + + if (Idx.getZExtValue() < FirstIdx || Idx.getZExtValue() > NumArgs) { + Diag(rawAttr->getLoc(), diag::err_attribute_argument_out_of_bounds, + "format", std::string("2"), IdxExpr->getSourceRange()); + return; + } + + // make sure the format string is really a string + QualType Ty = proto->getArgType(Idx.getZExtValue()-1); + if (!Ty->isPointerType() || + !Ty->getAsPointerType()->getPointeeType()->isCharType()) { + Diag(rawAttr->getLoc(), diag::err_format_attribute_not_string, + IdxExpr->getSourceRange()); + return; + } + + + // check the 3rd argument + Expr *FirstArgExpr = static_cast<Expr *>(rawAttr->getArg(1)); + llvm::APSInt FirstArg(Context.getTypeSize(FirstArgExpr->getType())); + if (!FirstArgExpr->isIntegerConstantExpr(FirstArg, Context)) { + Diag(rawAttr->getLoc(), diag::err_attribute_argument_n_not_int, + "format", std::string("3"), FirstArgExpr->getSourceRange()); + return; + } + + // check if the function is variadic if the 3rd argument non-zero + if (FirstArg != 0) { + if (proto->isVariadic()) { + ++NumArgs; // +1 for ... + } else { + Diag(d->getLocation(), diag::err_format_attribute_requires_variadic); + return; + } + } + + // strftime requires FirstArg to be 0 because it doesn't read from any variable + // the input is just the current time + the format string + if (FormatLen == 8 && !memcmp(Format, "strftime", 8)) { + if (FirstArg != 0) { + Diag(rawAttr->getLoc(), diag::err_format_strftime_third_parameter, + FirstArgExpr->getSourceRange()); + return; + } + // if 0 it disables parameter checking (to use with e.g. va_list) + } else if (FirstArg != 0 && FirstArg != NumArgs) { + Diag(rawAttr->getLoc(), diag::err_attribute_argument_out_of_bounds, + "format", std::string("3"), FirstArgExpr->getSourceRange()); + return; + } + + d->addAttr(new FormatAttr(std::string(Format, FormatLen), + Idx.getZExtValue(), FirstArg.getZExtValue())); +} + +void Sema::HandleAnnotateAttribute(Decl *d, AttributeList *rawAttr) { + // check the attribute arguments. + if (rawAttr->getNumArgs() != 1) { + Diag(rawAttr->getLoc(), diag::err_attribute_wrong_number_arguments, + std::string("1")); + return; + } + Expr *argExpr = static_cast<Expr *>(rawAttr->getArg(0)); + StringLiteral *SE = dyn_cast<StringLiteral>(argExpr); + + // Make sure that there is a string literal as the annotation's single + // argument. + if (!SE) { + Diag(rawAttr->getLoc(), diag::err_attribute_annotate_no_string); + return; + } + d->addAttr(new AnnotateAttr(std::string(SE->getStrData(), + SE->getByteLength()))); +} + +void Sema::HandleAlignedAttribute(Decl *d, AttributeList *rawAttr) +{ + // check the attribute arguments. + if (rawAttr->getNumArgs() > 1) { + Diag(rawAttr->getLoc(), diag::err_attribute_wrong_number_arguments, + std::string("1")); + return; + } + + unsigned Align = 0; + + if (rawAttr->getNumArgs() == 0) { + // FIXME: This should be the target specific maximum alignment. + // (For now we just use 128 bits which is the maximum on X86. + Align = 128; + return; + } else { + Expr *alignmentExpr = static_cast<Expr *>(rawAttr->getArg(0)); + llvm::APSInt alignment(32); + if (!alignmentExpr->isIntegerConstantExpr(alignment, Context)) { + Diag(rawAttr->getLoc(), diag::err_attribute_argument_not_int, + "aligned", alignmentExpr->getSourceRange()); + return; + } + + Align = alignment.getZExtValue() * 8; + } + + d->addAttr(new AlignedAttr(Align)); +} diff --git a/clang/lib/Sema/SemaDeclObjC.cpp b/clang/lib/Sema/SemaDeclObjC.cpp new file mode 100644 index 00000000000..20d127b31eb --- /dev/null +++ b/clang/lib/Sema/SemaDeclObjC.cpp @@ -0,0 +1,927 @@ +//===--- SemaDeclObjC.cpp - Semantic Analysis for ObjC Declarations -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements semantic analysis for Objective C declarations. +// +//===----------------------------------------------------------------------===// + +#include "Sema.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/DeclObjC.h" +#include "clang/Parse/Scope.h" + +using namespace clang; + +/// ObjCActOnStartOfMethodDef - This routine sets up parameters; invisible +/// and user declared, in the method definition's AST. +void Sema::ObjCActOnStartOfMethodDef(Scope *FnBodyScope, DeclTy *D) { + assert(CurFunctionDecl == 0 && "Method parsing confused"); + ObjCMethodDecl *MDecl = dyn_cast<ObjCMethodDecl>(static_cast<Decl *>(D)); + assert(MDecl != 0 && "Not a method declarator!"); + + // Allow the rest of sema to find private method decl implementations. + if (MDecl->isInstance()) + AddInstanceMethodToGlobalPool(MDecl); + else + AddFactoryMethodToGlobalPool(MDecl); + + // Allow all of Sema to see that we are entering a method definition. + CurMethodDecl = MDecl; + + // Create Decl objects for each parameter, entrring them in the scope for + // binding to their use. + struct DeclaratorChunk::ParamInfo PI; + + // Insert the invisible arguments, self and _cmd! + PI.Ident = &Context.Idents.get("self"); + PI.IdentLoc = SourceLocation(); // synthesized vars have a null location. + PI.InvalidType = false; + PI.AttrList = 0; + PI.TypeInfo = Context.getObjCIdType().getAsOpaquePtr(); + + if (MDecl->isInstance()) { + if (ObjCInterfaceDecl *OID = MDecl->getClassInterface()) { + // There may be no interface context due to error in declaration of the + // interface (which has been reported). Recover gracefully + QualType selfTy = Context.getObjCInterfaceType(OID); + selfTy = Context.getPointerType(selfTy); + PI.TypeInfo = selfTy.getAsOpaquePtr(); + } + } + + CurMethodDecl->setSelfDecl(ActOnParamDeclarator(PI, FnBodyScope)); + + PI.Ident = &Context.Idents.get("_cmd"); + PI.TypeInfo = Context.getObjCSelType().getAsOpaquePtr(); + ActOnParamDeclarator(PI, FnBodyScope); + + for (int i = 0; i < MDecl->getNumParams(); i++) { + ParmVarDecl *PDecl = MDecl->getParamDecl(i); + PI.Ident = PDecl->getIdentifier(); + PI.IdentLoc = PDecl->getLocation(); // user vars have a real location. + PI.TypeInfo = PDecl->getType().getAsOpaquePtr(); + MDecl->setParamDecl(i, ActOnParamDeclarator(PI, FnBodyScope)); + } +} + +Sema::DeclTy *Sema::ActOnStartClassInterface( + SourceLocation AtInterfaceLoc, + IdentifierInfo *ClassName, SourceLocation ClassLoc, + IdentifierInfo *SuperName, SourceLocation SuperLoc, + IdentifierInfo **ProtocolNames, unsigned NumProtocols, + SourceLocation EndProtoLoc, AttributeList *AttrList) { + assert(ClassName && "Missing class identifier"); + + // Check for another declaration kind with the same name. + ScopedDecl *PrevDecl = LookupInterfaceDecl(ClassName); + if (PrevDecl && !isa<ObjCInterfaceDecl>(PrevDecl)) { + Diag(ClassLoc, diag::err_redefinition_different_kind, + ClassName->getName()); + Diag(PrevDecl->getLocation(), diag::err_previous_definition); + } + + ObjCInterfaceDecl* IDecl = dyn_cast_or_null<ObjCInterfaceDecl>(PrevDecl); + if (IDecl) { + // Class already seen. Is it a forward declaration? + if (!IDecl->isForwardDecl()) + Diag(AtInterfaceLoc, diag::err_duplicate_class_def, IDecl->getName()); + else { + IDecl->setLocation(AtInterfaceLoc); + IDecl->setForwardDecl(false); + IDecl->AllocIntfRefProtocols(NumProtocols); + } + } + else { + IDecl = new ObjCInterfaceDecl(AtInterfaceLoc, NumProtocols, ClassName); + + // Chain & install the interface decl into the identifier. + IDecl->setNext(ClassName->getFETokenInfo<ScopedDecl>()); + ClassName->setFETokenInfo(IDecl); + + // Remember that this needs to be removed when the scope is popped. + TUScope->AddDecl(IDecl); + } + + if (SuperName) { + ObjCInterfaceDecl* SuperClassEntry = 0; + // Check if a different kind of symbol declared in this scope. + PrevDecl = LookupInterfaceDecl(SuperName); + if (PrevDecl && !isa<ObjCInterfaceDecl>(PrevDecl)) { + Diag(SuperLoc, diag::err_redefinition_different_kind, + SuperName->getName()); + Diag(PrevDecl->getLocation(), diag::err_previous_definition); + } + else { + // Check that super class is previously defined + SuperClassEntry = dyn_cast_or_null<ObjCInterfaceDecl>(PrevDecl); + + if (!SuperClassEntry || SuperClassEntry->isForwardDecl()) { + Diag(AtInterfaceLoc, diag::err_undef_superclass, + SuperClassEntry ? SuperClassEntry->getName() + : SuperName->getName(), + ClassName->getName()); + } + } + IDecl->setSuperClass(SuperClassEntry); + IDecl->setLocEnd(SuperLoc); + } else { // we have a root class. + IDecl->setLocEnd(ClassLoc); + } + + /// Check then save referenced protocols + if (NumProtocols) { + for (unsigned int i = 0; i != NumProtocols; i++) { + ObjCProtocolDecl* RefPDecl = ObjCProtocols[ProtocolNames[i]]; + if (!RefPDecl || RefPDecl->isForwardDecl()) + Diag(ClassLoc, diag::warn_undef_protocolref, + ProtocolNames[i]->getName(), + ClassName->getName()); + IDecl->setIntfRefProtocols(i, RefPDecl); + } + IDecl->setLocEnd(EndProtoLoc); + } + return IDecl; +} + +/// ActOnCompatiblityAlias - this action is called after complete parsing of +/// @compaatibility_alias declaration. It sets up the alias relationships. +Sema::DeclTy *Sema::ActOnCompatiblityAlias( + SourceLocation AtCompatibilityAliasLoc, + IdentifierInfo *AliasName, SourceLocation AliasLocation, + IdentifierInfo *ClassName, SourceLocation ClassLocation) { + // Look for previous declaration of alias name + ScopedDecl *ADecl = LookupScopedDecl(AliasName, Decl::IDNS_Ordinary, + AliasLocation, TUScope); + if (ADecl) { + if (isa<ObjCCompatibleAliasDecl>(ADecl)) { + Diag(AliasLocation, diag::warn_previous_alias_decl); + Diag(ADecl->getLocation(), diag::warn_previous_declaration); + } + else { + Diag(AliasLocation, diag::err_conflicting_aliasing_type, + AliasName->getName()); + Diag(ADecl->getLocation(), diag::err_previous_declaration); + } + return 0; + } + // Check for class declaration + ScopedDecl *CDecl = LookupScopedDecl(ClassName, Decl::IDNS_Ordinary, + ClassLocation, TUScope); + if (!CDecl || !isa<ObjCInterfaceDecl>(CDecl)) { + Diag(ClassLocation, diag::warn_undef_interface, + ClassName->getName()); + if (CDecl) + Diag(CDecl->getLocation(), diag::warn_previous_declaration); + return 0; + } + // Everything checked out, instantiate a new alias declaration ast + ObjCCompatibleAliasDecl *AliasDecl = + new ObjCCompatibleAliasDecl(AtCompatibilityAliasLoc, + AliasName, + dyn_cast<ObjCInterfaceDecl>(CDecl)); + + // Chain & install the interface decl into the identifier. + AliasDecl->setNext(AliasName->getFETokenInfo<ScopedDecl>()); + AliasName->setFETokenInfo(AliasDecl); + return AliasDecl; +} + +Sema::DeclTy *Sema::ActOnStartProtocolInterface( + SourceLocation AtProtoInterfaceLoc, + IdentifierInfo *ProtocolName, SourceLocation ProtocolLoc, + IdentifierInfo **ProtoRefNames, unsigned NumProtoRefs, + SourceLocation EndProtoLoc) { + assert(ProtocolName && "Missing protocol identifier"); + ObjCProtocolDecl *PDecl = ObjCProtocols[ProtocolName]; + if (PDecl) { + // Protocol already seen. Better be a forward protocol declaration + if (!PDecl->isForwardDecl()) + Diag(ProtocolLoc, diag::err_duplicate_protocol_def, + ProtocolName->getName()); + else { + PDecl->setForwardDecl(false); + PDecl->AllocReferencedProtocols(NumProtoRefs); + } + } + else { + PDecl = new ObjCProtocolDecl(AtProtoInterfaceLoc, NumProtoRefs, + ProtocolName); + ObjCProtocols[ProtocolName] = PDecl; + } + + if (NumProtoRefs) { + /// Check then save referenced protocols + for (unsigned int i = 0; i != NumProtoRefs; i++) { + ObjCProtocolDecl* RefPDecl = ObjCProtocols[ProtoRefNames[i]]; + if (!RefPDecl || RefPDecl->isForwardDecl()) + Diag(ProtocolLoc, diag::warn_undef_protocolref, + ProtoRefNames[i]->getName(), + ProtocolName->getName()); + PDecl->setReferencedProtocols(i, RefPDecl); + } + PDecl->setLocEnd(EndProtoLoc); + } + return PDecl; +} + +/// FindProtocolDeclaration - This routine looks up protocols and +/// issuer error if they are not declared. It returns list of protocol +/// declarations in its 'Protocols' argument. +void +Sema::FindProtocolDeclaration(SourceLocation TypeLoc, + IdentifierInfo **ProtocolId, + unsigned NumProtocols, + llvm::SmallVector<DeclTy *,8> &Protocols) { + for (unsigned i = 0; i != NumProtocols; ++i) { + ObjCProtocolDecl *PDecl = ObjCProtocols[ProtocolId[i]]; + if (!PDecl) + Diag(TypeLoc, diag::err_undeclared_protocol, + ProtocolId[i]->getName()); + else + Protocols.push_back(PDecl); + } +} + +/// ActOnForwardProtocolDeclaration - +Action::DeclTy * +Sema::ActOnForwardProtocolDeclaration(SourceLocation AtProtocolLoc, + IdentifierInfo **IdentList, unsigned NumElts) { + llvm::SmallVector<ObjCProtocolDecl*, 32> Protocols; + + for (unsigned i = 0; i != NumElts; ++i) { + IdentifierInfo *P = IdentList[i]; + ObjCProtocolDecl *PDecl = ObjCProtocols[P]; + if (!PDecl) { // Not already seen? + // FIXME: Pass in the location of the identifier! + PDecl = new ObjCProtocolDecl(AtProtocolLoc, 0, P, true); + ObjCProtocols[P] = PDecl; + } + + Protocols.push_back(PDecl); + } + return new ObjCForwardProtocolDecl(AtProtocolLoc, + &Protocols[0], Protocols.size()); +} + +Sema::DeclTy *Sema::ActOnStartCategoryInterface( + SourceLocation AtInterfaceLoc, + IdentifierInfo *ClassName, SourceLocation ClassLoc, + IdentifierInfo *CategoryName, SourceLocation CategoryLoc, + IdentifierInfo **ProtoRefNames, unsigned NumProtoRefs, + SourceLocation EndProtoLoc) { + ObjCInterfaceDecl *IDecl = getObjCInterfaceDecl(ClassName); + + ObjCCategoryDecl *CDecl = new ObjCCategoryDecl(AtInterfaceLoc, NumProtoRefs, + CategoryName); + CDecl->setClassInterface(IDecl); + + /// Check that class of this category is already completely declared. + if (!IDecl || IDecl->isForwardDecl()) + Diag(ClassLoc, diag::err_undef_interface, ClassName->getName()); + else { + /// Check for duplicate interface declaration for this category + ObjCCategoryDecl *CDeclChain; + for (CDeclChain = IDecl->getCategoryList(); CDeclChain; + CDeclChain = CDeclChain->getNextClassCategory()) { + if (CDeclChain->getIdentifier() == CategoryName) { + Diag(CategoryLoc, diag::err_dup_category_def, ClassName->getName(), + CategoryName->getName()); + break; + } + } + if (!CDeclChain) + CDecl->insertNextClassCategory(); + } + + if (NumProtoRefs) { + /// Check then save referenced protocols + for (unsigned int i = 0; i != NumProtoRefs; i++) { + ObjCProtocolDecl* RefPDecl = ObjCProtocols[ProtoRefNames[i]]; + if (!RefPDecl || RefPDecl->isForwardDecl()) { + Diag(CategoryLoc, diag::warn_undef_protocolref, + ProtoRefNames[i]->getName(), + CategoryName->getName()); + } + CDecl->setCatReferencedProtocols(i, RefPDecl); + } + CDecl->setLocEnd(EndProtoLoc); + } + return CDecl; +} + +/// ActOnStartCategoryImplementation - Perform semantic checks on the +/// category implementation declaration and build an ObjCCategoryImplDecl +/// object. +Sema::DeclTy *Sema::ActOnStartCategoryImplementation( + SourceLocation AtCatImplLoc, + IdentifierInfo *ClassName, SourceLocation ClassLoc, + IdentifierInfo *CatName, SourceLocation CatLoc) { + ObjCInterfaceDecl *IDecl = getObjCInterfaceDecl(ClassName); + ObjCCategoryImplDecl *CDecl = new ObjCCategoryImplDecl(AtCatImplLoc, + CatName, IDecl); + /// Check that class of this category is already completely declared. + if (!IDecl || IDecl->isForwardDecl()) + Diag(ClassLoc, diag::err_undef_interface, ClassName->getName()); + + /// TODO: Check that CatName, category name, is not used in another + // implementation. + return CDecl; +} + +Sema::DeclTy *Sema::ActOnStartClassImplementation( + SourceLocation AtClassImplLoc, + IdentifierInfo *ClassName, SourceLocation ClassLoc, + IdentifierInfo *SuperClassname, + SourceLocation SuperClassLoc) { + ObjCInterfaceDecl* IDecl = 0; + // Check for another declaration kind with the same name. + ScopedDecl *PrevDecl = LookupInterfaceDecl(ClassName); + if (PrevDecl && !isa<ObjCInterfaceDecl>(PrevDecl)) { + Diag(ClassLoc, diag::err_redefinition_different_kind, + ClassName->getName()); + Diag(PrevDecl->getLocation(), diag::err_previous_definition); + } + else { + // Is there an interface declaration of this class; if not, warn! + IDecl = dyn_cast_or_null<ObjCInterfaceDecl>(PrevDecl); + if (!IDecl) + Diag(ClassLoc, diag::warn_undef_interface, ClassName->getName()); + } + + // Check that super class name is valid class name + ObjCInterfaceDecl* SDecl = 0; + if (SuperClassname) { + // Check if a different kind of symbol declared in this scope. + PrevDecl = LookupInterfaceDecl(SuperClassname); + if (PrevDecl && !isa<ObjCInterfaceDecl>(PrevDecl)) { + Diag(SuperClassLoc, diag::err_redefinition_different_kind, + SuperClassname->getName()); + Diag(PrevDecl->getLocation(), diag::err_previous_definition); + } + else { + SDecl = dyn_cast_or_null<ObjCInterfaceDecl>(PrevDecl); + if (!SDecl) + Diag(SuperClassLoc, diag::err_undef_superclass, + SuperClassname->getName(), ClassName->getName()); + else if (IDecl && IDecl->getSuperClass() != SDecl) { + // This implementation and its interface do not have the same + // super class. + Diag(SuperClassLoc, diag::err_conflicting_super_class, + SDecl->getName()); + Diag(SDecl->getLocation(), diag::err_previous_definition); + } + } + } + + if (!IDecl) { + // Legacy case of @implementation with no corresponding @interface. + // Build, chain & install the interface decl into the identifier. + IDecl = new ObjCInterfaceDecl(AtClassImplLoc, 0, ClassName, + false, true); + IDecl->setNext(ClassName->getFETokenInfo<ScopedDecl>()); + ClassName->setFETokenInfo(IDecl); + IDecl->setSuperClass(SDecl); + IDecl->setLocEnd(ClassLoc); + + // Remember that this needs to be removed when the scope is popped. + TUScope->AddDecl(IDecl); + } + + ObjCImplementationDecl* IMPDecl = + new ObjCImplementationDecl(AtClassImplLoc, ClassName, IDecl, SDecl); + + // Check that there is no duplicate implementation of this class. + if (ObjCImplementations[ClassName]) + Diag(ClassLoc, diag::err_dup_implementation_class, ClassName->getName()); + else // add it to the list. + ObjCImplementations[ClassName] = IMPDecl; + return IMPDecl; +} + +void Sema::CheckImplementationIvars(ObjCImplementationDecl *ImpDecl, + ObjCIvarDecl **ivars, unsigned numIvars, + SourceLocation RBrace) { + assert(ImpDecl && "missing implementation decl"); + ObjCInterfaceDecl* IDecl = getObjCInterfaceDecl(ImpDecl->getIdentifier()); + if (!IDecl) + return; + /// Check case of non-existing @interface decl. + /// (legacy objective-c @implementation decl without an @interface decl). + /// Add implementations's ivar to the synthesize class's ivar list. + if (IDecl->ImplicitInterfaceDecl()) { + IDecl->addInstanceVariablesToClass(ivars, numIvars, RBrace); + return; + } + // If implementation has empty ivar list, just return. + if (numIvars == 0) + return; + + assert(ivars && "missing @implementation ivars"); + + // Check interface's Ivar list against those in the implementation. + // names and types must match. + // + unsigned j = 0; + ObjCInterfaceDecl::ivar_iterator + IVI = IDecl->ivar_begin(), IVE = IDecl->ivar_end(); + for (; numIvars > 0 && IVI != IVE; ++IVI) { + ObjCIvarDecl* ImplIvar = ivars[j++]; + ObjCIvarDecl* ClsIvar = *IVI; + assert (ImplIvar && "missing implementation ivar"); + assert (ClsIvar && "missing class ivar"); + if (ImplIvar->getCanonicalType() != ClsIvar->getCanonicalType()) { + Diag(ImplIvar->getLocation(), diag::err_conflicting_ivar_type, + ImplIvar->getIdentifier()->getName()); + Diag(ClsIvar->getLocation(), diag::err_previous_definition, + ClsIvar->getIdentifier()->getName()); + } + // TODO: Two mismatched (unequal width) Ivar bitfields should be diagnosed + // as error. + else if (ImplIvar->getIdentifier() != ClsIvar->getIdentifier()) { + Diag(ImplIvar->getLocation(), diag::err_conflicting_ivar_name, + ImplIvar->getIdentifier()->getName()); + Diag(ClsIvar->getLocation(), diag::err_previous_definition, + ClsIvar->getIdentifier()->getName()); + return; + } + --numIvars; + } + + if (numIvars > 0) + Diag(ivars[j]->getLocation(), diag::err_inconsistant_ivar_count); + else if (IVI != IVE) + Diag((*IVI)->getLocation(), diag::err_inconsistant_ivar_count); +} + +void Sema::WarnUndefinedMethod(SourceLocation ImpLoc, ObjCMethodDecl *method, + bool &IncompleteImpl) { + if (!IncompleteImpl) { + Diag(ImpLoc, diag::warn_incomplete_impl); + IncompleteImpl = true; + } + Diag(ImpLoc, diag::warn_undef_method_impl, method->getSelector().getName()); +} + +/// CheckProtocolMethodDefs - This routine checks unimplemented methods +/// Declared in protocol, and those referenced by it. +void Sema::CheckProtocolMethodDefs(SourceLocation ImpLoc, + ObjCProtocolDecl *PDecl, + bool& IncompleteImpl, + const llvm::DenseSet<Selector> &InsMap, + const llvm::DenseSet<Selector> &ClsMap) { + // check unimplemented instance methods. + for (ObjCProtocolDecl::instmeth_iterator I = PDecl->instmeth_begin(), + E = PDecl->instmeth_end(); I != E; ++I) { + ObjCMethodDecl *method = *I; + if (!InsMap.count(method->getSelector()) && + method->getImplementationControl() != ObjCMethodDecl::Optional) + WarnUndefinedMethod(ImpLoc, method, IncompleteImpl); + } + // check unimplemented class methods + for (ObjCProtocolDecl::classmeth_iterator I = PDecl->classmeth_begin(), + E = PDecl->classmeth_end(); I != E; ++I) { + ObjCMethodDecl *method = *I; + if (!ClsMap.count(method->getSelector()) && + method->getImplementationControl() != ObjCMethodDecl::Optional) + WarnUndefinedMethod(ImpLoc, method, IncompleteImpl); + } + // Check on this protocols's referenced protocols, recursively + ObjCProtocolDecl** RefPDecl = PDecl->getReferencedProtocols(); + for (unsigned i = 0; i < PDecl->getNumReferencedProtocols(); i++) + CheckProtocolMethodDefs(ImpLoc, RefPDecl[i], IncompleteImpl, InsMap, ClsMap); +} + +void Sema::ImplMethodsVsClassMethods(ObjCImplementationDecl* IMPDecl, + ObjCInterfaceDecl* IDecl) { + llvm::DenseSet<Selector> InsMap; + // Check and see if instance methods in class interface have been + // implemented in the implementation class. + for (ObjCImplementationDecl::instmeth_iterator I = IMPDecl->instmeth_begin(), + E = IMPDecl->instmeth_end(); I != E; ++I) + InsMap.insert((*I)->getSelector()); + + bool IncompleteImpl = false; + for (ObjCInterfaceDecl::instmeth_iterator I = IDecl->instmeth_begin(), + E = IDecl->instmeth_end(); I != E; ++I) + if (!InsMap.count((*I)->getSelector())) + WarnUndefinedMethod(IMPDecl->getLocation(), *I, IncompleteImpl); + + llvm::DenseSet<Selector> ClsMap; + // Check and see if class methods in class interface have been + // implemented in the implementation class. + for (ObjCImplementationDecl::classmeth_iterator I =IMPDecl->classmeth_begin(), + E = IMPDecl->classmeth_end(); I != E; ++I) + ClsMap.insert((*I)->getSelector()); + + for (ObjCInterfaceDecl::classmeth_iterator I = IDecl->classmeth_begin(), + E = IDecl->classmeth_end(); I != E; ++I) + if (!ClsMap.count((*I)->getSelector())) + WarnUndefinedMethod(IMPDecl->getLocation(), *I, IncompleteImpl); + + // Check the protocol list for unimplemented methods in the @implementation + // class. + ObjCProtocolDecl** protocols = IDecl->getReferencedProtocols(); + for (unsigned i = 0; i < IDecl->getNumIntfRefProtocols(); i++) + CheckProtocolMethodDefs(IMPDecl->getLocation(), protocols[i], + IncompleteImpl, InsMap, ClsMap); +} + +/// ImplCategoryMethodsVsIntfMethods - Checks that methods declared in the +/// category interface is implemented in the category @implementation. +void Sema::ImplCategoryMethodsVsIntfMethods(ObjCCategoryImplDecl *CatImplDecl, + ObjCCategoryDecl *CatClassDecl) { + llvm::DenseSet<Selector> InsMap; + // Check and see if instance methods in category interface have been + // implemented in its implementation class. + for (ObjCCategoryImplDecl::instmeth_iterator I =CatImplDecl->instmeth_begin(), + E = CatImplDecl->instmeth_end(); I != E; ++I) + InsMap.insert((*I)->getSelector()); + + bool IncompleteImpl = false; + for (ObjCCategoryDecl::instmeth_iterator I = CatClassDecl->instmeth_begin(), + E = CatClassDecl->instmeth_end(); I != E; ++I) + if (!InsMap.count((*I)->getSelector())) + WarnUndefinedMethod(CatImplDecl->getLocation(), *I, IncompleteImpl); + + llvm::DenseSet<Selector> ClsMap; + // Check and see if class methods in category interface have been + // implemented in its implementation class. + for (ObjCCategoryImplDecl::classmeth_iterator + I = CatImplDecl->classmeth_begin(), E = CatImplDecl->classmeth_end(); + I != E; ++I) + ClsMap.insert((*I)->getSelector()); + + for (ObjCCategoryDecl::classmeth_iterator I = CatClassDecl->classmeth_begin(), + E = CatClassDecl->classmeth_end(); I != E; ++I) + if (!ClsMap.count((*I)->getSelector())) + WarnUndefinedMethod(CatImplDecl->getLocation(), *I, IncompleteImpl); + + // Check the protocol list for unimplemented methods in the @implementation + // class. + ObjCProtocolDecl** protocols = CatClassDecl->getReferencedProtocols(); + for (unsigned i = 0; i < CatClassDecl->getNumReferencedProtocols(); i++) { + ObjCProtocolDecl* PDecl = protocols[i]; + CheckProtocolMethodDefs(CatImplDecl->getLocation(), PDecl, IncompleteImpl, + InsMap, ClsMap); + } +} + +/// ActOnForwardClassDeclaration - +Action::DeclTy * +Sema::ActOnForwardClassDeclaration(SourceLocation AtClassLoc, + IdentifierInfo **IdentList, unsigned NumElts) +{ + llvm::SmallVector<ObjCInterfaceDecl*, 32> Interfaces; + + for (unsigned i = 0; i != NumElts; ++i) { + // Check for another declaration kind with the same name. + ScopedDecl *PrevDecl = LookupInterfaceDecl(IdentList[i]); + if (PrevDecl && !isa<ObjCInterfaceDecl>(PrevDecl)) { + Diag(AtClassLoc, diag::err_redefinition_different_kind, + IdentList[i]->getName()); + Diag(PrevDecl->getLocation(), diag::err_previous_definition); + } + ObjCInterfaceDecl *IDecl = dyn_cast_or_null<ObjCInterfaceDecl>(PrevDecl); + if (!IDecl) { // Not already seen? Make a forward decl. + IDecl = new ObjCInterfaceDecl(AtClassLoc, 0, IdentList[i], true); + // Chain & install the interface decl into the identifier. + IDecl->setNext(IdentList[i]->getFETokenInfo<ScopedDecl>()); + IdentList[i]->setFETokenInfo(IDecl); + + // Remember that this needs to be removed when the scope is popped. + TUScope->AddDecl(IDecl); + } + + Interfaces.push_back(IDecl); + } + + return new ObjCClassDecl(AtClassLoc, &Interfaces[0], Interfaces.size()); +} + + +/// MatchTwoMethodDeclarations - Checks that two methods have matching type and +/// returns true, or false, accordingly. +/// TODO: Handle protocol list; such as id<p1,p2> in type comparisons +bool Sema::MatchTwoMethodDeclarations(const ObjCMethodDecl *Method, + const ObjCMethodDecl *PrevMethod) { + if (Method->getResultType().getCanonicalType() != + PrevMethod->getResultType().getCanonicalType()) + return false; + for (int i = 0; i < Method->getNumParams(); i++) { + ParmVarDecl *ParamDecl = Method->getParamDecl(i); + ParmVarDecl *PrevParamDecl = PrevMethod->getParamDecl(i); + if (ParamDecl->getCanonicalType() != PrevParamDecl->getCanonicalType()) + return false; + } + return true; +} + +void Sema::AddInstanceMethodToGlobalPool(ObjCMethodDecl *Method) { + ObjCMethodList &FirstMethod = InstanceMethodPool[Method->getSelector()]; + if (!FirstMethod.Method) { + // Haven't seen a method with this selector name yet - add it. + FirstMethod.Method = Method; + FirstMethod.Next = 0; + } else { + // We've seen a method with this name, now check the type signature(s). + bool match = MatchTwoMethodDeclarations(Method, FirstMethod.Method); + + for (ObjCMethodList *Next = FirstMethod.Next; !match && Next; + Next = Next->Next) + match = MatchTwoMethodDeclarations(Method, Next->Method); + + if (!match) { + // We have a new signature for an existing method - add it. + // This is extremely rare. Only 1% of Cocoa selectors are "overloaded". + struct ObjCMethodList *OMI = new ObjCMethodList(Method, FirstMethod.Next); + FirstMethod.Next = OMI; + } + } +} + +void Sema::AddFactoryMethodToGlobalPool(ObjCMethodDecl *Method) { + ObjCMethodList &FirstMethod = FactoryMethodPool[Method->getSelector()]; + if (!FirstMethod.Method) { + // Haven't seen a method with this selector name yet - add it. + FirstMethod.Method = Method; + FirstMethod.Next = 0; + } else { + // We've seen a method with this name, now check the type signature(s). + bool match = MatchTwoMethodDeclarations(Method, FirstMethod.Method); + + for (ObjCMethodList *Next = FirstMethod.Next; !match && Next; + Next = Next->Next) + match = MatchTwoMethodDeclarations(Method, Next->Method); + + if (!match) { + // We have a new signature for an existing method - add it. + // This is extremely rare. Only 1% of Cocoa selectors are "overloaded". + struct ObjCMethodList *OMI = new ObjCMethodList(Method, FirstMethod.Next); + FirstMethod.Next = OMI; + } + } +} + +// Note: For class/category implemenations, allMethods/allProperties is +// always null. +void Sema::ActOnAtEnd(SourceLocation AtEndLoc, DeclTy *classDecl, + DeclTy **allMethods, unsigned allNum, + DeclTy **allProperties, unsigned pNum) { + Decl *ClassDecl = static_cast<Decl *>(classDecl); + + // FIXME: If we don't have a ClassDecl, we have an error. We should consider + // always passing in a decl. If the decl has an error, isInvalidDecl() + // should be true. + if (!ClassDecl) + return; + + llvm::SmallVector<ObjCMethodDecl*, 32> insMethods; + llvm::SmallVector<ObjCMethodDecl*, 16> clsMethods; + + llvm::DenseMap<Selector, const ObjCMethodDecl*> InsMap; + llvm::DenseMap<Selector, const ObjCMethodDecl*> ClsMap; + + bool isInterfaceDeclKind = + (isa<ObjCInterfaceDecl>(ClassDecl) || isa<ObjCCategoryDecl>(ClassDecl) + || isa<ObjCProtocolDecl>(ClassDecl)); + bool checkIdenticalMethods = isa<ObjCImplementationDecl>(ClassDecl); + + // TODO: property declaration in category and protocols. + if (pNum != 0 && isa<ObjCInterfaceDecl>(ClassDecl)) { + ObjCPropertyDecl **properties = new ObjCPropertyDecl*[pNum]; + memcpy(properties, allProperties, pNum*sizeof(ObjCPropertyDecl*)); + dyn_cast<ObjCInterfaceDecl>(ClassDecl)->setPropertyDecls(properties); + dyn_cast<ObjCInterfaceDecl>(ClassDecl)->setNumPropertyDecl(pNum); + } + + for (unsigned i = 0; i < allNum; i++ ) { + ObjCMethodDecl *Method = + cast_or_null<ObjCMethodDecl>(static_cast<Decl*>(allMethods[i])); + + if (!Method) continue; // Already issued a diagnostic. + if (Method->isInstance()) { + /// Check for instance method of the same name with incompatible types + const ObjCMethodDecl *&PrevMethod = InsMap[Method->getSelector()]; + bool match = PrevMethod ? MatchTwoMethodDeclarations(Method, PrevMethod) + : false; + if (isInterfaceDeclKind && PrevMethod && !match + || checkIdenticalMethods && match) { + Diag(Method->getLocation(), diag::error_duplicate_method_decl, + Method->getSelector().getName()); + Diag(PrevMethod->getLocation(), diag::err_previous_declaration); + } else { + insMethods.push_back(Method); + InsMap[Method->getSelector()] = Method; + /// The following allows us to typecheck messages to "id". + AddInstanceMethodToGlobalPool(Method); + } + } + else { + /// Check for class method of the same name with incompatible types + const ObjCMethodDecl *&PrevMethod = ClsMap[Method->getSelector()]; + bool match = PrevMethod ? MatchTwoMethodDeclarations(Method, PrevMethod) + : false; + if (isInterfaceDeclKind && PrevMethod && !match + || checkIdenticalMethods && match) { + Diag(Method->getLocation(), diag::error_duplicate_method_decl, + Method->getSelector().getName()); + Diag(PrevMethod->getLocation(), diag::err_previous_declaration); + } else { + clsMethods.push_back(Method); + ClsMap[Method->getSelector()] = Method; + /// The following allows us to typecheck messages to "Class". + AddFactoryMethodToGlobalPool(Method); + } + } + } + + if (ObjCInterfaceDecl *I = dyn_cast<ObjCInterfaceDecl>(ClassDecl)) { + I->addMethods(&insMethods[0], insMethods.size(), + &clsMethods[0], clsMethods.size(), AtEndLoc); + } else if (ObjCProtocolDecl *P = dyn_cast<ObjCProtocolDecl>(ClassDecl)) { + P->addMethods(&insMethods[0], insMethods.size(), + &clsMethods[0], clsMethods.size(), AtEndLoc); + } + else if (ObjCCategoryDecl *C = dyn_cast<ObjCCategoryDecl>(ClassDecl)) { + C->addMethods(&insMethods[0], insMethods.size(), + &clsMethods[0], clsMethods.size(), AtEndLoc); + } + else if (ObjCImplementationDecl *IC = + dyn_cast<ObjCImplementationDecl>(ClassDecl)) { + IC->setLocEnd(AtEndLoc); + if (ObjCInterfaceDecl* IDecl = getObjCInterfaceDecl(IC->getIdentifier())) + ImplMethodsVsClassMethods(IC, IDecl); + } else { + ObjCCategoryImplDecl* CatImplClass = cast<ObjCCategoryImplDecl>(ClassDecl); + CatImplClass->setLocEnd(AtEndLoc); + ObjCInterfaceDecl* IDecl = CatImplClass->getClassInterface(); + // Find category interface decl and then check that all methods declared + // in this interface is implemented in the category @implementation. + if (IDecl) { + for (ObjCCategoryDecl *Categories = IDecl->getCategoryList(); + Categories; Categories = Categories->getNextClassCategory()) { + if (Categories->getIdentifier() == CatImplClass->getIdentifier()) { + ImplCategoryMethodsVsIntfMethods(CatImplClass, Categories); + break; + } + } + } + } +} + + +/// CvtQTToAstBitMask - utility routine to produce an AST bitmask for +/// objective-c's type qualifier from the parser version of the same info. +static Decl::ObjCDeclQualifier +CvtQTToAstBitMask(ObjCDeclSpec::ObjCDeclQualifier PQTVal) { + Decl::ObjCDeclQualifier ret = Decl::OBJC_TQ_None; + if (PQTVal & ObjCDeclSpec::DQ_In) + ret = (Decl::ObjCDeclQualifier)(ret | Decl::OBJC_TQ_In); + if (PQTVal & ObjCDeclSpec::DQ_Inout) + ret = (Decl::ObjCDeclQualifier)(ret | Decl::OBJC_TQ_Inout); + if (PQTVal & ObjCDeclSpec::DQ_Out) + ret = (Decl::ObjCDeclQualifier)(ret | Decl::OBJC_TQ_Out); + if (PQTVal & ObjCDeclSpec::DQ_Bycopy) + ret = (Decl::ObjCDeclQualifier)(ret | Decl::OBJC_TQ_Bycopy); + if (PQTVal & ObjCDeclSpec::DQ_Byref) + ret = (Decl::ObjCDeclQualifier)(ret | Decl::OBJC_TQ_Byref); + if (PQTVal & ObjCDeclSpec::DQ_Oneway) + ret = (Decl::ObjCDeclQualifier)(ret | Decl::OBJC_TQ_Oneway); + + return ret; +} + +Sema::DeclTy *Sema::ActOnMethodDeclaration( + SourceLocation MethodLoc, SourceLocation EndLoc, + tok::TokenKind MethodType, DeclTy *ClassDecl, + ObjCDeclSpec &ReturnQT, TypeTy *ReturnType, + Selector Sel, + // optional arguments. The number of types/arguments is obtained + // from the Sel.getNumArgs(). + ObjCDeclSpec *ArgQT, TypeTy **ArgTypes, IdentifierInfo **ArgNames, + AttributeList *AttrList, tok::ObjCKeywordKind MethodDeclKind, + bool isVariadic) { + + // Make sure we can establish a context for the method. + if (!ClassDecl) { + Diag(MethodLoc, diag::error_missing_method_context); + return 0; + } + llvm::SmallVector<ParmVarDecl*, 16> Params; + + for (unsigned i = 0; i < Sel.getNumArgs(); i++) { + // FIXME: arg->AttrList must be stored too! + QualType argType; + + if (ArgTypes[i]) + argType = QualType::getFromOpaquePtr(ArgTypes[i]); + else + argType = Context.getObjCIdType(); + ParmVarDecl* Param = ParmVarDecl::Create(Context, SourceLocation(/*FIXME*/), + ArgNames[i], argType, + VarDecl::None, 0); + Param->setObjCDeclQualifier( + CvtQTToAstBitMask(ArgQT[i].getObjCDeclQualifier())); + Params.push_back(Param); + } + QualType resultDeclType; + + if (ReturnType) + resultDeclType = QualType::getFromOpaquePtr(ReturnType); + else // get the type for "id". + resultDeclType = Context.getObjCIdType(); + + Decl *CDecl = static_cast<Decl*>(ClassDecl); + ObjCMethodDecl* ObjCMethod = new ObjCMethodDecl(MethodLoc, EndLoc, Sel, + resultDeclType, + CDecl, + 0, -1, AttrList, + MethodType == tok::minus, isVariadic, + MethodDeclKind == tok::objc_optional ? + ObjCMethodDecl::Optional : + ObjCMethodDecl::Required); + ObjCMethod->setMethodParams(&Params[0], Sel.getNumArgs()); + ObjCMethod->setObjCDeclQualifier( + CvtQTToAstBitMask(ReturnQT.getObjCDeclQualifier())); + const ObjCMethodDecl *PrevMethod = 0; + + // For implementations (which can be very "coarse grain"), we add the + // method now. This allows the AST to implement lookup methods that work + // incrementally (without waiting until we parse the @end). It also allows + // us to flag multiple declaration errors as they occur. + if (ObjCImplementationDecl *ImpDecl = + dyn_cast<ObjCImplementationDecl>(CDecl)) { + if (MethodType == tok::minus) { + PrevMethod = ImpDecl->getInstanceMethod(Sel); + ImpDecl->addInstanceMethod(ObjCMethod); + } else { + PrevMethod = ImpDecl->getClassMethod(Sel); + ImpDecl->addClassMethod(ObjCMethod); + } + } + else if (ObjCCategoryImplDecl *CatImpDecl = + dyn_cast<ObjCCategoryImplDecl>(CDecl)) { + if (MethodType == tok::minus) { + PrevMethod = CatImpDecl->getInstanceMethod(Sel); + CatImpDecl->addInstanceMethod(ObjCMethod); + } else { + PrevMethod = CatImpDecl->getClassMethod(Sel); + CatImpDecl->addClassMethod(ObjCMethod); + } + } + if (PrevMethod) { + // You can never have two method definitions with the same name. + Diag(ObjCMethod->getLocation(), diag::error_duplicate_method_decl, + ObjCMethod->getSelector().getName()); + Diag(PrevMethod->getLocation(), diag::err_previous_declaration); + } + return ObjCMethod; +} + +Sema::DeclTy *Sema::ActOnAddObjCProperties(SourceLocation AtLoc, + DeclTy **allProperties, unsigned NumProperties, ObjCDeclSpec &DS) { + ObjCPropertyDecl *PDecl = new ObjCPropertyDecl(AtLoc); + + if(DS.getPropertyAttributes() & ObjCDeclSpec::DQ_PR_readonly) + PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_readonly); + + if(DS.getPropertyAttributes() & ObjCDeclSpec::DQ_PR_getter) { + PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_getter); + PDecl->setGetterName(DS.getGetterName()); + } + + if(DS.getPropertyAttributes() & ObjCDeclSpec::DQ_PR_setter) { + PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_setter); + PDecl->setSetterName(DS.getSetterName()); + } + + if(DS.getPropertyAttributes() & ObjCDeclSpec::DQ_PR_assign) + PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_assign); + + if(DS.getPropertyAttributes() & ObjCDeclSpec::DQ_PR_readwrite) + PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_readwrite); + + if(DS.getPropertyAttributes() & ObjCDeclSpec::DQ_PR_retain) + PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_retain); + + if(DS.getPropertyAttributes() & ObjCDeclSpec::DQ_PR_copy) + PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_copy); + + if(DS.getPropertyAttributes() & ObjCDeclSpec::DQ_PR_nonatomic) + PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_nonatomic); + + PDecl->setNumPropertyDecls(NumProperties); + if (NumProperties != 0) { + ObjCIvarDecl **properties = new ObjCIvarDecl*[NumProperties]; + memcpy(properties, allProperties, NumProperties*sizeof(ObjCIvarDecl*)); + PDecl->setPropertyDecls(properties); + } + return PDecl; +} + diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp new file mode 100644 index 00000000000..d756345c1a8 --- /dev/null +++ b/clang/lib/Sema/SemaExpr.cpp @@ -0,0 +1,2286 @@ +//===--- SemaExpr.cpp - Semantic Analysis for Expressions -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements semantic analysis for expressions. +// +//===----------------------------------------------------------------------===// + +#include "Sema.h" +#include "SemaUtil.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Expr.h" +#include "clang/Parse/DeclSpec.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +using namespace clang; + +/// ActOnStringLiteral - The specified tokens were lexed as pasted string +/// fragments (e.g. "foo" "bar" L"baz"). The result string has to handle string +/// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from +/// multiple tokens. However, the common case is that StringToks points to one +/// string. +/// +Action::ExprResult +Sema::ActOnStringLiteral(const Token *StringToks, unsigned NumStringToks) { + assert(NumStringToks && "Must have at least one string!"); + + StringLiteralParser Literal(StringToks, NumStringToks, PP, Context.Target); + if (Literal.hadError) + return ExprResult(true); + + llvm::SmallVector<SourceLocation, 4> StringTokLocs; + for (unsigned i = 0; i != NumStringToks; ++i) + StringTokLocs.push_back(StringToks[i].getLocation()); + + // Verify that pascal strings aren't too large. + if (Literal.Pascal && Literal.GetStringLength() > 256) + return Diag(StringToks[0].getLocation(), diag::err_pascal_string_too_long, + SourceRange(StringToks[0].getLocation(), + StringToks[NumStringToks-1].getLocation())); + + QualType StrTy = Context.CharTy; + // FIXME: handle wchar_t + if (Literal.Pascal) StrTy = Context.UnsignedCharTy; + + // Get an array type for the string, according to C99 6.4.5. This includes + // the nul terminator character as well as the string length for pascal + // strings. + StrTy = Context.getConstantArrayType(StrTy, + llvm::APInt(32, Literal.GetStringLength()+1), + ArrayType::Normal, 0); + + // Pass &StringTokLocs[0], StringTokLocs.size() to factory! + return new StringLiteral(Literal.GetString(), Literal.GetStringLength(), + Literal.AnyWide, StrTy, + StringToks[0].getLocation(), + StringToks[NumStringToks-1].getLocation()); +} + + +/// ActOnIdentifierExpr - The parser read an identifier in expression context, +/// validate it per-C99 6.5.1. HasTrailingLParen indicates whether this +/// identifier is used in an function call context. +Sema::ExprResult Sema::ActOnIdentifierExpr(Scope *S, SourceLocation Loc, + IdentifierInfo &II, + bool HasTrailingLParen) { + // Could be enum-constant or decl. + ScopedDecl *D = LookupScopedDecl(&II, Decl::IDNS_Ordinary, Loc, S); + if (D == 0) { + // Otherwise, this could be an implicitly declared function reference (legal + // in C90, extension in C99). + if (HasTrailingLParen && + // Not in C++. + !getLangOptions().CPlusPlus) + D = ImplicitlyDefineFunction(Loc, II, S); + else { + if (CurMethodDecl) { + ObjCInterfaceDecl *IFace = CurMethodDecl->getClassInterface(); + ObjCInterfaceDecl *clsDeclared; + if (ObjCIvarDecl *IV = IFace->lookupInstanceVariable(&II, clsDeclared)) { + IdentifierInfo &II = Context.Idents.get("self"); + ExprResult SelfExpr = ActOnIdentifierExpr(S, Loc, II, false); + return new ObjCIvarRefExpr(IV, IV->getType(), Loc, + static_cast<Expr*>(SelfExpr.Val), true, true); + } + } + // If this name wasn't predeclared and if this is not a function call, + // diagnose the problem. + return Diag(Loc, diag::err_undeclared_var_use, II.getName()); + } + } + if (ValueDecl *VD = dyn_cast<ValueDecl>(D)) { + // check if referencing an identifier with __attribute__((deprecated)). + if (VD->getAttr<DeprecatedAttr>()) + Diag(Loc, diag::warn_deprecated, VD->getName()); + + // Only create DeclRefExpr's for valid Decl's. + if (VD->isInvalidDecl()) + return true; + return new DeclRefExpr(VD, VD->getType(), Loc); + } + if (isa<TypedefDecl>(D)) + return Diag(Loc, diag::err_unexpected_typedef, II.getName()); + if (isa<ObjCInterfaceDecl>(D)) + return Diag(Loc, diag::err_unexpected_interface, II.getName()); + + assert(0 && "Invalid decl"); + abort(); +} + +Sema::ExprResult Sema::ActOnPreDefinedExpr(SourceLocation Loc, + tok::TokenKind Kind) { + PreDefinedExpr::IdentType IT; + + switch (Kind) { + default: assert(0 && "Unknown simple primary expr!"); + case tok::kw___func__: IT = PreDefinedExpr::Func; break; // [C99 6.4.2.2] + case tok::kw___FUNCTION__: IT = PreDefinedExpr::Function; break; + case tok::kw___PRETTY_FUNCTION__: IT = PreDefinedExpr::PrettyFunction; break; + } + + // Verify that this is in a function context. + if (CurFunctionDecl == 0 && CurMethodDecl == 0) + return Diag(Loc, diag::err_predef_outside_function); + + // Pre-defined identifiers are of type char[x], where x is the length of the + // string. + unsigned Length; + if (CurFunctionDecl) + Length = CurFunctionDecl->getIdentifier()->getLength(); + else + Length = CurMethodDecl->getSynthesizedMethodSize(); + + llvm::APInt LengthI(32, Length + 1); + QualType ResTy = Context.CharTy.getQualifiedType(QualType::Const); + ResTy = Context.getConstantArrayType(ResTy, LengthI, ArrayType::Normal, 0); + return new PreDefinedExpr(Loc, ResTy, IT); +} + +Sema::ExprResult Sema::ActOnCharacterConstant(const Token &Tok) { + llvm::SmallString<16> CharBuffer; + CharBuffer.resize(Tok.getLength()); + const char *ThisTokBegin = &CharBuffer[0]; + unsigned ActualLength = PP.getSpelling(Tok, ThisTokBegin); + + CharLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, + Tok.getLocation(), PP); + if (Literal.hadError()) + return ExprResult(true); + + QualType type = getLangOptions().CPlusPlus ? Context.CharTy : Context.IntTy; + + return new CharacterLiteral(Literal.getValue(), type, Tok.getLocation()); +} + +Action::ExprResult Sema::ActOnNumericConstant(const Token &Tok) { + // fast path for a single digit (which is quite common). A single digit + // cannot have a trigraph, escaped newline, radix prefix, or type suffix. + if (Tok.getLength() == 1) { + const char *t = PP.getSourceManager().getCharacterData(Tok.getLocation()); + + unsigned IntSize =static_cast<unsigned>(Context.getTypeSize(Context.IntTy)); + return ExprResult(new IntegerLiteral(llvm::APInt(IntSize, *t-'0'), + Context.IntTy, + Tok.getLocation())); + } + llvm::SmallString<512> IntegerBuffer; + IntegerBuffer.resize(Tok.getLength()); + const char *ThisTokBegin = &IntegerBuffer[0]; + + // Get the spelling of the token, which eliminates trigraphs, etc. + unsigned ActualLength = PP.getSpelling(Tok, ThisTokBegin); + NumericLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, + Tok.getLocation(), PP); + if (Literal.hadError) + return ExprResult(true); + + Expr *Res; + + if (Literal.isFloatingLiteral()) { + QualType Ty; + const llvm::fltSemantics *Format; + + if (Literal.isFloat) { + Ty = Context.FloatTy; + Format = Context.Target.getFloatFormat(); + } else if (!Literal.isLong) { + Ty = Context.DoubleTy; + Format = Context.Target.getDoubleFormat(); + } else { + Ty = Context.LongDoubleTy; + Format = Context.Target.getLongDoubleFormat(); + } + + // isExact will be set by GetFloatValue(). + bool isExact = false; + + Res = new FloatingLiteral(Literal.GetFloatValue(*Format,&isExact), &isExact, + Ty, Tok.getLocation()); + + } else if (!Literal.isIntegerLiteral()) { + return ExprResult(true); + } else { + QualType t; + + // long long is a C99 feature. + if (!getLangOptions().C99 && !getLangOptions().CPlusPlus0x && + Literal.isLongLong) + Diag(Tok.getLocation(), diag::ext_longlong); + + // Get the value in the widest-possible width. + llvm::APInt ResultVal(Context.Target.getIntMaxTWidth(), 0); + + if (Literal.GetIntegerValue(ResultVal)) { + // If this value didn't fit into uintmax_t, warn and force to ull. + Diag(Tok.getLocation(), diag::warn_integer_too_large); + t = Context.UnsignedLongLongTy; + assert(Context.getTypeSize(t) == ResultVal.getBitWidth() && + "long long is not intmax_t?"); + } else { + // If this value fits into a ULL, try to figure out what else it fits into + // according to the rules of C99 6.4.4.1p5. + + // Octal, Hexadecimal, and integers with a U suffix are allowed to + // be an unsigned int. + bool AllowUnsigned = Literal.isUnsigned || Literal.getRadix() != 10; + + // Check from smallest to largest, picking the smallest type we can. + if (!Literal.isLong && !Literal.isLongLong) { + // Are int/unsigned possibilities? + unsigned IntSize = + static_cast<unsigned>(Context.getTypeSize(Context.IntTy)); + // Does it fit in a unsigned int? + if (ResultVal.isIntN(IntSize)) { + // Does it fit in a signed int? + if (!Literal.isUnsigned && ResultVal[IntSize-1] == 0) + t = Context.IntTy; + else if (AllowUnsigned) + t = Context.UnsignedIntTy; + } + + if (!t.isNull()) + ResultVal.trunc(IntSize); + } + + // Are long/unsigned long possibilities? + if (t.isNull() && !Literal.isLongLong) { + unsigned LongSize = + static_cast<unsigned>(Context.getTypeSize(Context.LongTy)); + + // Does it fit in a unsigned long? + if (ResultVal.isIntN(LongSize)) { + // Does it fit in a signed long? + if (!Literal.isUnsigned && ResultVal[LongSize-1] == 0) + t = Context.LongTy; + else if (AllowUnsigned) + t = Context.UnsignedLongTy; + } + if (!t.isNull()) + ResultVal.trunc(LongSize); + } + + // Finally, check long long if needed. + if (t.isNull()) { + unsigned LongLongSize = + static_cast<unsigned>(Context.getTypeSize(Context.LongLongTy)); + + // Does it fit in a unsigned long long? + if (ResultVal.isIntN(LongLongSize)) { + // Does it fit in a signed long long? + if (!Literal.isUnsigned && ResultVal[LongLongSize-1] == 0) + t = Context.LongLongTy; + else if (AllowUnsigned) + t = Context.UnsignedLongLongTy; + } + } + + // If we still couldn't decide a type, we probably have something that + // does not fit in a signed long long, but has no U suffix. + if (t.isNull()) { + Diag(Tok.getLocation(), diag::warn_integer_too_large_for_signed); + t = Context.UnsignedLongLongTy; + } + } + + Res = new IntegerLiteral(ResultVal, t, Tok.getLocation()); + } + + // If this is an imaginary literal, create the ImaginaryLiteral wrapper. + if (Literal.isImaginary) + Res = new ImaginaryLiteral(Res, Context.getComplexType(Res->getType())); + + return Res; +} + +Action::ExprResult Sema::ActOnParenExpr(SourceLocation L, SourceLocation R, + ExprTy *Val) { + Expr *e = (Expr *)Val; + assert((e != 0) && "ActOnParenExpr() missing expr"); + return new ParenExpr(L, R, e); +} + +/// The UsualUnaryConversions() function is *not* called by this routine. +/// See C99 6.3.2.1p[2-4] for more details. +QualType Sema::CheckSizeOfAlignOfOperand(QualType exprType, + SourceLocation OpLoc, bool isSizeof) { + // C99 6.5.3.4p1: + if (isa<FunctionType>(exprType) && isSizeof) + // alignof(function) is allowed. + Diag(OpLoc, diag::ext_sizeof_function_type); + else if (exprType->isVoidType()) + Diag(OpLoc, diag::ext_sizeof_void_type, isSizeof ? "sizeof" : "__alignof"); + else if (exprType->isIncompleteType()) { + Diag(OpLoc, isSizeof ? diag::err_sizeof_incomplete_type : + diag::err_alignof_incomplete_type, + exprType.getAsString()); + return QualType(); // error + } + // C99 6.5.3.4p4: the type (an unsigned integer type) is size_t. + return Context.getSizeType(); +} + +Action::ExprResult Sema:: +ActOnSizeOfAlignOfTypeExpr(SourceLocation OpLoc, bool isSizeof, + SourceLocation LPLoc, TypeTy *Ty, + SourceLocation RPLoc) { + // If error parsing type, ignore. + if (Ty == 0) return true; + + // Verify that this is a valid expression. + QualType ArgTy = QualType::getFromOpaquePtr(Ty); + + QualType resultType = CheckSizeOfAlignOfOperand(ArgTy, OpLoc, isSizeof); + + if (resultType.isNull()) + return true; + return new SizeOfAlignOfTypeExpr(isSizeof, ArgTy, resultType, OpLoc, RPLoc); +} + +QualType Sema::CheckRealImagOperand(Expr *&V, SourceLocation Loc) { + DefaultFunctionArrayConversion(V); + + // These operators return the element type of a complex type. + if (const ComplexType *CT = V->getType()->getAsComplexType()) + return CT->getElementType(); + + // Otherwise they pass through real integer and floating point types here. + if (V->getType()->isArithmeticType()) + return V->getType(); + + // Reject anything else. + Diag(Loc, diag::err_realimag_invalid_type, V->getType().getAsString()); + return QualType(); +} + + + +Action::ExprResult Sema::ActOnPostfixUnaryOp(SourceLocation OpLoc, + tok::TokenKind Kind, + ExprTy *Input) { + UnaryOperator::Opcode Opc; + switch (Kind) { + default: assert(0 && "Unknown unary op!"); + case tok::plusplus: Opc = UnaryOperator::PostInc; break; + case tok::minusminus: Opc = UnaryOperator::PostDec; break; + } + QualType result = CheckIncrementDecrementOperand((Expr *)Input, OpLoc); + if (result.isNull()) + return true; + return new UnaryOperator((Expr *)Input, Opc, result, OpLoc); +} + +Action::ExprResult Sema:: +ActOnArraySubscriptExpr(ExprTy *Base, SourceLocation LLoc, + ExprTy *Idx, SourceLocation RLoc) { + Expr *LHSExp = static_cast<Expr*>(Base), *RHSExp = static_cast<Expr*>(Idx); + + // Perform default conversions. + DefaultFunctionArrayConversion(LHSExp); + DefaultFunctionArrayConversion(RHSExp); + + QualType LHSTy = LHSExp->getType(), RHSTy = RHSExp->getType(); + + // C99 6.5.2.1p2: the expression e1[e2] is by definition precisely equivalent + // to the expression *((e1)+(e2)). This means the array "Base" may actually be + // in the subscript position. As a result, we need to derive the array base + // and index from the expression types. + Expr *BaseExpr, *IndexExpr; + QualType ResultType; + if (const PointerType *PTy = LHSTy->getAsPointerType()) { + BaseExpr = LHSExp; + IndexExpr = RHSExp; + // FIXME: need to deal with const... + ResultType = PTy->getPointeeType(); + } else if (const PointerType *PTy = RHSTy->getAsPointerType()) { + // Handle the uncommon case of "123[Ptr]". + BaseExpr = RHSExp; + IndexExpr = LHSExp; + // FIXME: need to deal with const... + ResultType = PTy->getPointeeType(); + } else if (const VectorType *VTy = LHSTy->getAsVectorType()) { + BaseExpr = LHSExp; // vectors: V[123] + IndexExpr = RHSExp; + + // Component access limited to variables (reject vec4.rg[1]). + if (!isa<DeclRefExpr>(BaseExpr) && !isa<ArraySubscriptExpr>(BaseExpr)) + return Diag(LLoc, diag::err_ocuvector_component_access, + SourceRange(LLoc, RLoc)); + // FIXME: need to deal with const... + ResultType = VTy->getElementType(); + } else { + return Diag(LHSExp->getLocStart(), diag::err_typecheck_subscript_value, + RHSExp->getSourceRange()); + } + // C99 6.5.2.1p1 + if (!IndexExpr->getType()->isIntegerType()) + return Diag(IndexExpr->getLocStart(), diag::err_typecheck_subscript, + IndexExpr->getSourceRange()); + + // C99 6.5.2.1p1: "shall have type "pointer to *object* type". In practice, + // the following check catches trying to index a pointer to a function (e.g. + // void (*)(int)). Functions are not objects in C99. + if (!ResultType->isObjectType()) + return Diag(BaseExpr->getLocStart(), + diag::err_typecheck_subscript_not_object, + BaseExpr->getType().getAsString(), BaseExpr->getSourceRange()); + + return new ArraySubscriptExpr(LHSExp, RHSExp, ResultType, RLoc); +} + +QualType Sema:: +CheckOCUVectorComponent(QualType baseType, SourceLocation OpLoc, + IdentifierInfo &CompName, SourceLocation CompLoc) { + const OCUVectorType *vecType = baseType->getAsOCUVectorType(); + + // The vector accessor can't exceed the number of elements. + const char *compStr = CompName.getName(); + if (strlen(compStr) > vecType->getNumElements()) { + Diag(OpLoc, diag::err_ocuvector_component_exceeds_length, + baseType.getAsString(), SourceRange(CompLoc)); + return QualType(); + } + // The component names must come from the same set. + if (vecType->getPointAccessorIdx(*compStr) != -1) { + do + compStr++; + while (*compStr && vecType->getPointAccessorIdx(*compStr) != -1); + } else if (vecType->getColorAccessorIdx(*compStr) != -1) { + do + compStr++; + while (*compStr && vecType->getColorAccessorIdx(*compStr) != -1); + } else if (vecType->getTextureAccessorIdx(*compStr) != -1) { + do + compStr++; + while (*compStr && vecType->getTextureAccessorIdx(*compStr) != -1); + } + + if (*compStr) { + // We didn't get to the end of the string. This means the component names + // didn't come from the same set *or* we encountered an illegal name. + Diag(OpLoc, diag::err_ocuvector_component_name_illegal, + std::string(compStr,compStr+1), SourceRange(CompLoc)); + return QualType(); + } + // Each component accessor can't exceed the vector type. + compStr = CompName.getName(); + while (*compStr) { + if (vecType->isAccessorWithinNumElements(*compStr)) + compStr++; + else + break; + } + if (*compStr) { + // We didn't get to the end of the string. This means a component accessor + // exceeds the number of elements in the vector. + Diag(OpLoc, diag::err_ocuvector_component_exceeds_length, + baseType.getAsString(), SourceRange(CompLoc)); + return QualType(); + } + // The component accessor looks fine - now we need to compute the actual type. + // The vector type is implied by the component accessor. For example, + // vec4.b is a float, vec4.xy is a vec2, vec4.rgb is a vec3, etc. + unsigned CompSize = strlen(CompName.getName()); + if (CompSize == 1) + return vecType->getElementType(); + + QualType VT = Context.getOCUVectorType(vecType->getElementType(), CompSize); + // Now look up the TypeDefDecl from the vector type. Without this, + // diagostics look bad. We want OCU vector types to appear built-in. + for (unsigned i = 0, e = OCUVectorDecls.size(); i != e; ++i) { + if (OCUVectorDecls[i]->getUnderlyingType() == VT) + return Context.getTypedefType(OCUVectorDecls[i]); + } + return VT; // should never get here (a typedef type should always be found). +} + +Action::ExprResult Sema:: +ActOnMemberReferenceExpr(ExprTy *Base, SourceLocation OpLoc, + tok::TokenKind OpKind, SourceLocation MemberLoc, + IdentifierInfo &Member) { + Expr *BaseExpr = static_cast<Expr *>(Base); + assert(BaseExpr && "no record expression"); + + // Perform default conversions. + DefaultFunctionArrayConversion(BaseExpr); + + QualType BaseType = BaseExpr->getType(); + assert(!BaseType.isNull() && "no type for member expression"); + + if (OpKind == tok::arrow) { + if (const PointerType *PT = BaseType->getAsPointerType()) + BaseType = PT->getPointeeType(); + else + return Diag(OpLoc, diag::err_typecheck_member_reference_arrow, + SourceRange(MemberLoc)); + } + // The base type is either a record or an OCUVectorType. + if (const RecordType *RTy = BaseType->getAsRecordType()) { + RecordDecl *RDecl = RTy->getDecl(); + if (RTy->isIncompleteType()) + return Diag(OpLoc, diag::err_typecheck_incomplete_tag, RDecl->getName(), + BaseExpr->getSourceRange()); + // The record definition is complete, now make sure the member is valid. + FieldDecl *MemberDecl = RDecl->getMember(&Member); + if (!MemberDecl) + return Diag(OpLoc, diag::err_typecheck_no_member, Member.getName(), + SourceRange(MemberLoc)); + + // Figure out the type of the member; see C99 6.5.2.3p3 + // FIXME: Handle address space modifiers + QualType MemberType = MemberDecl->getType(); + unsigned combinedQualifiers = + MemberType.getCVRQualifiers() | BaseType.getCVRQualifiers(); + MemberType = MemberType.getQualifiedType(combinedQualifiers); + + return new MemberExpr(BaseExpr, OpKind==tok::arrow, MemberDecl, + MemberLoc, MemberType); + } else if (BaseType->isOCUVectorType() && OpKind == tok::period) { + // Component access limited to variables (reject vec4.rg.g). + if (!isa<DeclRefExpr>(BaseExpr)) + return Diag(OpLoc, diag::err_ocuvector_component_access, + SourceRange(MemberLoc)); + QualType ret = CheckOCUVectorComponent(BaseType, OpLoc, Member, MemberLoc); + if (ret.isNull()) + return true; + return new OCUVectorElementExpr(ret, BaseExpr, Member, MemberLoc); + } else if (BaseType->isObjCInterfaceType()) { + ObjCInterfaceDecl *IFace; + if (isa<ObjCInterfaceType>(BaseType.getCanonicalType())) + IFace = dyn_cast<ObjCInterfaceType>(BaseType)->getDecl(); + else + IFace = dyn_cast<ObjCQualifiedInterfaceType>(BaseType)->getDecl(); + ObjCInterfaceDecl *clsDeclared; + if (ObjCIvarDecl *IV = IFace->lookupInstanceVariable(&Member, clsDeclared)) + return new ObjCIvarRefExpr(IV, IV->getType(), MemberLoc, BaseExpr, + OpKind==tok::arrow); + } + return Diag(OpLoc, diag::err_typecheck_member_reference_structUnion, + SourceRange(MemberLoc)); +} + +/// ActOnCallExpr - Handle a call to Fn with the specified array of arguments. +/// This provides the location of the left/right parens and a list of comma +/// locations. +Action::ExprResult Sema:: +ActOnCallExpr(ExprTy *fn, SourceLocation LParenLoc, + ExprTy **args, unsigned NumArgs, + SourceLocation *CommaLocs, SourceLocation RParenLoc) { + Expr *Fn = static_cast<Expr *>(fn); + Expr **Args = reinterpret_cast<Expr**>(args); + assert(Fn && "no function call expression"); + + // Make the call expr early, before semantic checks. This guarantees cleanup + // of arguments and function on error. + llvm::OwningPtr<CallExpr> TheCall(new CallExpr(Fn, Args, NumArgs, + Context.BoolTy, RParenLoc)); + + // Promote the function operand. + TheCall->setCallee(UsualUnaryConversions(Fn)); + + // C99 6.5.2.2p1 - "The expression that denotes the called function shall have + // type pointer to function". + const PointerType *PT = Fn->getType()->getAsPointerType(); + if (PT == 0) + return Diag(Fn->getLocStart(), diag::err_typecheck_call_not_function, + SourceRange(Fn->getLocStart(), RParenLoc)); + const FunctionType *FuncT = PT->getPointeeType()->getAsFunctionType(); + if (FuncT == 0) + return Diag(Fn->getLocStart(), diag::err_typecheck_call_not_function, + SourceRange(Fn->getLocStart(), RParenLoc)); + + // We know the result type of the call, set it. + TheCall->setType(FuncT->getResultType()); + + if (const FunctionTypeProto *Proto = dyn_cast<FunctionTypeProto>(FuncT)) { + // C99 6.5.2.2p7 - the arguments are implicitly converted, as if by + // assignment, to the types of the corresponding parameter, ... + unsigned NumArgsInProto = Proto->getNumArgs(); + unsigned NumArgsToCheck = NumArgs; + + // If too few arguments are available, don't make the call. + if (NumArgs < NumArgsInProto) + return Diag(RParenLoc, diag::err_typecheck_call_too_few_args, + Fn->getSourceRange()); + + // If too many are passed and not variadic, error on the extras and drop + // them. + if (NumArgs > NumArgsInProto) { + if (!Proto->isVariadic()) { + Diag(Args[NumArgsInProto]->getLocStart(), + diag::err_typecheck_call_too_many_args, Fn->getSourceRange(), + SourceRange(Args[NumArgsInProto]->getLocStart(), + Args[NumArgs-1]->getLocEnd())); + // This deletes the extra arguments. + TheCall->setNumArgs(NumArgsInProto); + } + NumArgsToCheck = NumArgsInProto; + } + + // Continue to check argument types (even if we have too few/many args). + for (unsigned i = 0; i != NumArgsToCheck; i++) { + Expr *Arg = Args[i]; + QualType ProtoArgType = Proto->getArgType(i); + QualType ArgType = Arg->getType(); + + // Compute implicit casts from the operand to the formal argument type. + AssignConvertType ConvTy = + CheckSingleAssignmentConstraints(ProtoArgType, Arg); + TheCall->setArg(i, Arg); + + if (DiagnoseAssignmentResult(ConvTy, Arg->getLocStart(), ProtoArgType, + ArgType, Arg, "passing")) + return true; + } + + // If this is a variadic call, handle args passed through "...". + if (Proto->isVariadic()) { + // Promote the arguments (C99 6.5.2.2p7). + for (unsigned i = NumArgsInProto; i != NumArgs; i++) { + Expr *Arg = Args[i]; + DefaultArgumentPromotion(Arg); + TheCall->setArg(i, Arg); + } + } + } else { + assert(isa<FunctionTypeNoProto>(FuncT) && "Unknown FunctionType!"); + + // Promote the arguments (C99 6.5.2.2p6). + for (unsigned i = 0; i != NumArgs; i++) { + Expr *Arg = Args[i]; + DefaultArgumentPromotion(Arg); + TheCall->setArg(i, Arg); + } + } + + // Do special checking on direct calls to functions. + if (ImplicitCastExpr *IcExpr = dyn_cast<ImplicitCastExpr>(Fn)) + if (DeclRefExpr *DRExpr = dyn_cast<DeclRefExpr>(IcExpr->getSubExpr())) + if (FunctionDecl *FDecl = dyn_cast<FunctionDecl>(DRExpr->getDecl())) + if (CheckFunctionCall(FDecl, TheCall.get())) + return true; + + return TheCall.take(); +} + +Action::ExprResult Sema:: +ActOnCompoundLiteral(SourceLocation LParenLoc, TypeTy *Ty, + SourceLocation RParenLoc, ExprTy *InitExpr) { + assert((Ty != 0) && "ActOnCompoundLiteral(): missing type"); + QualType literalType = QualType::getFromOpaquePtr(Ty); + // FIXME: put back this assert when initializers are worked out. + //assert((InitExpr != 0) && "ActOnCompoundLiteral(): missing expression"); + Expr *literalExpr = static_cast<Expr*>(InitExpr); + + // FIXME: add more semantic analysis (C99 6.5.2.5). + if (CheckInitializerTypes(literalExpr, literalType)) + return true; + + bool isFileScope = !CurFunctionDecl && !CurMethodDecl; + if (isFileScope) { // 6.5.2.5p3 + if (CheckForConstantInitializer(literalExpr, literalType)) + return true; + } + return new CompoundLiteralExpr(LParenLoc, literalType, literalExpr, isFileScope); +} + +Action::ExprResult Sema:: +ActOnInitList(SourceLocation LBraceLoc, ExprTy **initlist, unsigned NumInit, + SourceLocation RBraceLoc) { + Expr **InitList = reinterpret_cast<Expr**>(initlist); + + // Semantic analysis for initializers is done by ActOnDeclarator() and + // CheckInitializer() - it requires knowledge of the object being intialized. + + InitListExpr *e = new InitListExpr(LBraceLoc, InitList, NumInit, RBraceLoc); + e->setType(Context.VoidTy); // FIXME: just a place holder for now. + return e; +} + +bool Sema::CheckVectorCast(SourceRange R, QualType VectorTy, QualType Ty) { + assert(VectorTy->isVectorType() && "Not a vector type!"); + + if (Ty->isVectorType() || Ty->isIntegerType()) { + if (Context.getTypeSize(VectorTy) != Context.getTypeSize(Ty)) + return Diag(R.getBegin(), + Ty->isVectorType() ? + diag::err_invalid_conversion_between_vectors : + diag::err_invalid_conversion_between_vector_and_integer, + VectorTy.getAsString().c_str(), + Ty.getAsString().c_str(), R); + } else + return Diag(R.getBegin(), + diag::err_invalid_conversion_between_vector_and_scalar, + VectorTy.getAsString().c_str(), + Ty.getAsString().c_str(), R); + + return false; +} + +Action::ExprResult Sema:: +ActOnCastExpr(SourceLocation LParenLoc, TypeTy *Ty, + SourceLocation RParenLoc, ExprTy *Op) { + assert((Ty != 0) && (Op != 0) && "ActOnCastExpr(): missing type or expr"); + + Expr *castExpr = static_cast<Expr*>(Op); + QualType castType = QualType::getFromOpaquePtr(Ty); + + UsualUnaryConversions(castExpr); + + // C99 6.5.4p2: the cast type needs to be void or scalar and the expression + // type needs to be scalar. + if (!castType->isVoidType()) { // Cast to void allows any expr type. + if (!castType->isScalarType() && !castType->isVectorType()) + return Diag(LParenLoc, diag::err_typecheck_cond_expect_scalar, + castType.getAsString(), SourceRange(LParenLoc, RParenLoc)); + if (!castExpr->getType()->isScalarType() && + !castExpr->getType()->isVectorType()) + return Diag(castExpr->getLocStart(), + diag::err_typecheck_expect_scalar_operand, + castExpr->getType().getAsString(),castExpr->getSourceRange()); + + if (castExpr->getType()->isVectorType()) { + if (CheckVectorCast(SourceRange(LParenLoc, RParenLoc), + castExpr->getType(), castType)) + return true; + } else if (castType->isVectorType()) { + if (CheckVectorCast(SourceRange(LParenLoc, RParenLoc), + castType, castExpr->getType())) + return true; + } + } + return new CastExpr(castType, castExpr, LParenLoc); +} + +/// Note that lex is not null here, even if this is the gnu "x ?: y" extension. +/// In that case, lex = cond. +inline QualType Sema::CheckConditionalOperands( // C99 6.5.15 + Expr *&cond, Expr *&lex, Expr *&rex, SourceLocation questionLoc) { + UsualUnaryConversions(cond); + UsualUnaryConversions(lex); + UsualUnaryConversions(rex); + QualType condT = cond->getType(); + QualType lexT = lex->getType(); + QualType rexT = rex->getType(); + + // first, check the condition. + if (!condT->isScalarType()) { // C99 6.5.15p2 + Diag(cond->getLocStart(), diag::err_typecheck_cond_expect_scalar, + condT.getAsString()); + return QualType(); + } + + // Now check the two expressions. + + // If both operands have arithmetic type, do the usual arithmetic conversions + // to find a common type: C99 6.5.15p3,5. + if (lexT->isArithmeticType() && rexT->isArithmeticType()) { + UsualArithmeticConversions(lex, rex); + return lex->getType(); + } + + // If both operands are the same structure or union type, the result is that + // type. + if (const RecordType *LHSRT = lexT->getAsRecordType()) { // C99 6.5.15p3 + if (const RecordType *RHSRT = rexT->getAsRecordType()) + if (LHSRT->getDecl() == RHSRT->getDecl()) + // "If both the operands have structure or union type, the result has + // that type." This implies that CV qualifiers are dropped. + return lexT.getUnqualifiedType(); + } + + // C99 6.5.15p5: "If both operands have void type, the result has void type." + if (lexT->isVoidType() && rexT->isVoidType()) + return lexT.getUnqualifiedType(); + + // C99 6.5.15p6 - "if one operand is a null pointer constant, the result has + // the type of the other operand." + if (lexT->isPointerType() && rex->isNullPointerConstant(Context)) { + ImpCastExprToType(rex, lexT); // promote the null to a pointer. + return lexT; + } + if (rexT->isPointerType() && lex->isNullPointerConstant(Context)) { + ImpCastExprToType(lex, rexT); // promote the null to a pointer. + return rexT; + } + // Handle the case where both operands are pointers before we handle null + // pointer constants in case both operands are null pointer constants. + if (const PointerType *LHSPT = lexT->getAsPointerType()) { // C99 6.5.15p3,6 + if (const PointerType *RHSPT = rexT->getAsPointerType()) { + // get the "pointed to" types + QualType lhptee = LHSPT->getPointeeType(); + QualType rhptee = RHSPT->getPointeeType(); + + // ignore qualifiers on void (C99 6.5.15p3, clause 6) + if (lhptee->isVoidType() && + (rhptee->isObjectType() || rhptee->isIncompleteType())) { + // Figure out necessary qualifiers (C99 6.5.15p6) + QualType destPointee=lhptee.getQualifiedType(rhptee.getCVRQualifiers()); + QualType destType = Context.getPointerType(destPointee); + ImpCastExprToType(lex, destType); // add qualifiers if necessary + ImpCastExprToType(rex, destType); // promote to void* + return destType; + } + if (rhptee->isVoidType() && + (lhptee->isObjectType() || lhptee->isIncompleteType())) { + QualType destPointee=rhptee.getQualifiedType(lhptee.getCVRQualifiers()); + QualType destType = Context.getPointerType(destPointee); + ImpCastExprToType(lex, destType); // add qualifiers if necessary + ImpCastExprToType(rex, destType); // promote to void* + return destType; + } + + if (!Context.typesAreCompatible(lhptee.getUnqualifiedType(), + rhptee.getUnqualifiedType())) { + Diag(questionLoc, diag::warn_typecheck_cond_incompatible_pointers, + lexT.getAsString(), rexT.getAsString(), + lex->getSourceRange(), rex->getSourceRange()); + // In this situation, we assume void* type. No especially good + // reason, but this is what gcc does, and we do have to pick + // to get a consistent AST. + QualType voidPtrTy = Context.getPointerType(Context.VoidTy); + ImpCastExprToType(lex, voidPtrTy); + ImpCastExprToType(rex, voidPtrTy); + return voidPtrTy; + } + // The pointer types are compatible. + // C99 6.5.15p6: If both operands are pointers to compatible types *or* to + // differently qualified versions of compatible types, the result type is + // a pointer to an appropriately qualified version of the *composite* + // type. + // FIXME: Need to return the composite type. + // FIXME: Need to add qualifiers + return lexT; + } + } + + // Otherwise, the operands are not compatible. + Diag(questionLoc, diag::err_typecheck_cond_incompatible_operands, + lexT.getAsString(), rexT.getAsString(), + lex->getSourceRange(), rex->getSourceRange()); + return QualType(); +} + +/// ActOnConditionalOp - Parse a ?: operation. Note that 'LHS' may be null +/// in the case of a the GNU conditional expr extension. +Action::ExprResult Sema::ActOnConditionalOp(SourceLocation QuestionLoc, + SourceLocation ColonLoc, + ExprTy *Cond, ExprTy *LHS, + ExprTy *RHS) { + Expr *CondExpr = (Expr *) Cond; + Expr *LHSExpr = (Expr *) LHS, *RHSExpr = (Expr *) RHS; + + // If this is the gnu "x ?: y" extension, analyze the types as though the LHS + // was the condition. + bool isLHSNull = LHSExpr == 0; + if (isLHSNull) + LHSExpr = CondExpr; + + QualType result = CheckConditionalOperands(CondExpr, LHSExpr, + RHSExpr, QuestionLoc); + if (result.isNull()) + return true; + return new ConditionalOperator(CondExpr, isLHSNull ? 0 : LHSExpr, + RHSExpr, result); +} + +/// DefaultArgumentPromotion (C99 6.5.2.2p6). Used for function calls that +/// do not have a prototype. Arguments that have type float are promoted to +/// double. All other argument types are converted by UsualUnaryConversions(). +void Sema::DefaultArgumentPromotion(Expr *&Expr) { + QualType Ty = Expr->getType(); + assert(!Ty.isNull() && "DefaultArgumentPromotion - missing type"); + + if (Ty == Context.FloatTy) + ImpCastExprToType(Expr, Context.DoubleTy); + else + UsualUnaryConversions(Expr); +} + +/// DefaultFunctionArrayConversion (C99 6.3.2.1p3, C99 6.3.2.1p4). +void Sema::DefaultFunctionArrayConversion(Expr *&e) { + QualType t = e->getType(); + assert(!t.isNull() && "DefaultFunctionArrayConversion - missing type"); + + if (const ReferenceType *ref = t->getAsReferenceType()) { + ImpCastExprToType(e, ref->getReferenceeType()); // C++ [expr] + t = e->getType(); + } + if (t->isFunctionType()) + ImpCastExprToType(e, Context.getPointerType(t)); + else if (const ArrayType *ary = t->getAsArrayType()) { + // Make sure we don't lose qualifiers when dealing with typedefs. Example: + // typedef int arr[10]; + // void test2() { + // const arr b; + // b[4] = 1; + // } + QualType ELT = ary->getElementType(); + // FIXME: Handle ASQualType + ELT = ELT.getQualifiedType(t.getCVRQualifiers()|ELT.getCVRQualifiers()); + ImpCastExprToType(e, Context.getPointerType(ELT)); + } +} + +/// UsualUnaryConversions - Performs various conversions that are common to most +/// operators (C99 6.3). The conversions of array and function types are +/// sometimes surpressed. For example, the array->pointer conversion doesn't +/// apply if the array is an argument to the sizeof or address (&) operators. +/// In these instances, this routine should *not* be called. +Expr *Sema::UsualUnaryConversions(Expr *&Expr) { + QualType Ty = Expr->getType(); + assert(!Ty.isNull() && "UsualUnaryConversions - missing type"); + + if (const ReferenceType *Ref = Ty->getAsReferenceType()) { + ImpCastExprToType(Expr, Ref->getReferenceeType()); // C++ [expr] + Ty = Expr->getType(); + } + if (Ty->isPromotableIntegerType()) // C99 6.3.1.1p2 + ImpCastExprToType(Expr, Context.IntTy); + else + DefaultFunctionArrayConversion(Expr); + + return Expr; +} + +/// UsualArithmeticConversions - Performs various conversions that are common to +/// binary operators (C99 6.3.1.8). If both operands aren't arithmetic, this +/// routine returns the first non-arithmetic type found. The client is +/// responsible for emitting appropriate error diagnostics. +/// FIXME: verify the conversion rules for "complex int" are consistent with GCC. +QualType Sema::UsualArithmeticConversions(Expr *&lhsExpr, Expr *&rhsExpr, + bool isCompAssign) { + if (!isCompAssign) { + UsualUnaryConversions(lhsExpr); + UsualUnaryConversions(rhsExpr); + } + // For conversion purposes, we ignore any qualifiers. + // For example, "const float" and "float" are equivalent. + QualType lhs = lhsExpr->getType().getCanonicalType().getUnqualifiedType(); + QualType rhs = rhsExpr->getType().getCanonicalType().getUnqualifiedType(); + + // If both types are identical, no conversion is needed. + if (lhs == rhs) + return lhs; + + // If either side is a non-arithmetic type (e.g. a pointer), we are done. + // The caller can deal with this (e.g. pointer + int). + if (!lhs->isArithmeticType() || !rhs->isArithmeticType()) + return lhs; + + // At this point, we have two different arithmetic types. + + // Handle complex types first (C99 6.3.1.8p1). + if (lhs->isComplexType() || rhs->isComplexType()) { + // if we have an integer operand, the result is the complex type. + if (rhs->isIntegerType() || rhs->isComplexIntegerType()) { + // convert the rhs to the lhs complex type. + if (!isCompAssign) ImpCastExprToType(rhsExpr, lhs); + return lhs; + } + if (lhs->isIntegerType() || lhs->isComplexIntegerType()) { + // convert the lhs to the rhs complex type. + if (!isCompAssign) ImpCastExprToType(lhsExpr, rhs); + return rhs; + } + // This handles complex/complex, complex/float, or float/complex. + // When both operands are complex, the shorter operand is converted to the + // type of the longer, and that is the type of the result. This corresponds + // to what is done when combining two real floating-point operands. + // The fun begins when size promotion occur across type domains. + // From H&S 6.3.4: When one operand is complex and the other is a real + // floating-point type, the less precise type is converted, within it's + // real or complex domain, to the precision of the other type. For example, + // when combining a "long double" with a "double _Complex", the + // "double _Complex" is promoted to "long double _Complex". + int result = Context.compareFloatingType(lhs, rhs); + + if (result > 0) { // The left side is bigger, convert rhs. + rhs = Context.getFloatingTypeOfSizeWithinDomain(lhs, rhs); + if (!isCompAssign) + ImpCastExprToType(rhsExpr, rhs); + } else if (result < 0) { // The right side is bigger, convert lhs. + lhs = Context.getFloatingTypeOfSizeWithinDomain(rhs, lhs); + if (!isCompAssign) + ImpCastExprToType(lhsExpr, lhs); + } + // At this point, lhs and rhs have the same rank/size. Now, make sure the + // domains match. This is a requirement for our implementation, C99 + // does not require this promotion. + if (lhs != rhs) { // Domains don't match, we have complex/float mix. + if (lhs->isRealFloatingType()) { // handle "double, _Complex double". + if (!isCompAssign) + ImpCastExprToType(lhsExpr, rhs); + return rhs; + } else { // handle "_Complex double, double". + if (!isCompAssign) + ImpCastExprToType(rhsExpr, lhs); + return lhs; + } + } + return lhs; // The domain/size match exactly. + } + // Now handle "real" floating types (i.e. float, double, long double). + if (lhs->isRealFloatingType() || rhs->isRealFloatingType()) { + // if we have an integer operand, the result is the real floating type. + if (rhs->isIntegerType() || rhs->isComplexIntegerType()) { + // convert rhs to the lhs floating point type. + if (!isCompAssign) ImpCastExprToType(rhsExpr, lhs); + return lhs; + } + if (lhs->isIntegerType() || lhs->isComplexIntegerType()) { + // convert lhs to the rhs floating point type. + if (!isCompAssign) ImpCastExprToType(lhsExpr, rhs); + return rhs; + } + // We have two real floating types, float/complex combos were handled above. + // Convert the smaller operand to the bigger result. + int result = Context.compareFloatingType(lhs, rhs); + + if (result > 0) { // convert the rhs + if (!isCompAssign) ImpCastExprToType(rhsExpr, lhs); + return lhs; + } + if (result < 0) { // convert the lhs + if (!isCompAssign) ImpCastExprToType(lhsExpr, rhs); // convert the lhs + return rhs; + } + assert(0 && "Sema::UsualArithmeticConversions(): illegal float comparison"); + } + if (lhs->isComplexIntegerType() || rhs->isComplexIntegerType()) { + // Handle GCC complex int extension. + const ComplexType *lhsComplexInt = lhs->getAsComplexIntegerType(); + const ComplexType *rhsComplexInt = rhs->getAsComplexIntegerType(); + + if (lhsComplexInt && rhsComplexInt) { + if (Context.maxIntegerType(lhsComplexInt->getElementType(), + rhsComplexInt->getElementType()) == lhs) { + // convert the rhs + if (!isCompAssign) ImpCastExprToType(rhsExpr, lhs); + return lhs; + } + if (!isCompAssign) + ImpCastExprToType(lhsExpr, rhs); // convert the lhs + return rhs; + } else if (lhsComplexInt && rhs->isIntegerType()) { + // convert the rhs to the lhs complex type. + if (!isCompAssign) ImpCastExprToType(rhsExpr, lhs); + return lhs; + } else if (rhsComplexInt && lhs->isIntegerType()) { + // convert the lhs to the rhs complex type. + if (!isCompAssign) ImpCastExprToType(lhsExpr, rhs); + return rhs; + } + } + // Finally, we have two differing integer types. + if (Context.maxIntegerType(lhs, rhs) == lhs) { // convert the rhs + if (!isCompAssign) ImpCastExprToType(rhsExpr, lhs); + return lhs; + } + if (!isCompAssign) ImpCastExprToType(lhsExpr, rhs); // convert the lhs + return rhs; +} + +// CheckPointerTypesForAssignment - This is a very tricky routine (despite +// being closely modeled after the C99 spec:-). The odd characteristic of this +// routine is it effectively iqnores the qualifiers on the top level pointee. +// This circumvents the usual type rules specified in 6.2.7p1 & 6.7.5.[1-3]. +// FIXME: add a couple examples in this comment. +Sema::AssignConvertType +Sema::CheckPointerTypesForAssignment(QualType lhsType, QualType rhsType) { + QualType lhptee, rhptee; + + // get the "pointed to" type (ignoring qualifiers at the top level) + lhptee = lhsType->getAsPointerType()->getPointeeType(); + rhptee = rhsType->getAsPointerType()->getPointeeType(); + + // make sure we operate on the canonical type + lhptee = lhptee.getCanonicalType(); + rhptee = rhptee.getCanonicalType(); + + AssignConvertType ConvTy = Compatible; + + // C99 6.5.16.1p1: This following citation is common to constraints + // 3 & 4 (below). ...and the type *pointed to* by the left has all the + // qualifiers of the type *pointed to* by the right; + // FIXME: Handle ASQualType + if ((lhptee.getCVRQualifiers() & rhptee.getCVRQualifiers()) != + rhptee.getCVRQualifiers()) + ConvTy = CompatiblePointerDiscardsQualifiers; + + // C99 6.5.16.1p1 (constraint 4): If one operand is a pointer to an object or + // incomplete type and the other is a pointer to a qualified or unqualified + // version of void... + if (lhptee->isVoidType()) { + if (rhptee->isObjectType() || rhptee->isIncompleteType()) + return ConvTy; + + // As an extension, we allow cast to/from void* to function pointer. + if (rhptee->isFunctionType()) + return FunctionVoidPointer; + } + + if (rhptee->isVoidType()) { + if (lhptee->isObjectType() || lhptee->isIncompleteType()) + return ConvTy; + + // As an extension, we allow cast to/from void* to function pointer. + if (lhptee->isFunctionType()) + return FunctionVoidPointer; + } + + // C99 6.5.16.1p1 (constraint 3): both operands are pointers to qualified or + // unqualified versions of compatible types, ... + if (!Context.typesAreCompatible(lhptee.getUnqualifiedType(), + rhptee.getUnqualifiedType())) + return IncompatiblePointer; // this "trumps" PointerAssignDiscardsQualifiers + return ConvTy; +} + +/// CheckAssignmentConstraints (C99 6.5.16) - This routine currently +/// has code to accommodate several GCC extensions when type checking +/// pointers. Here are some objectionable examples that GCC considers warnings: +/// +/// int a, *pint; +/// short *pshort; +/// struct foo *pfoo; +/// +/// pint = pshort; // warning: assignment from incompatible pointer type +/// a = pint; // warning: assignment makes integer from pointer without a cast +/// pint = a; // warning: assignment makes pointer from integer without a cast +/// pint = pfoo; // warning: assignment from incompatible pointer type +/// +/// As a result, the code for dealing with pointers is more complex than the +/// C99 spec dictates. +/// Note: the warning above turn into errors when -pedantic-errors is enabled. +/// +Sema::AssignConvertType +Sema::CheckAssignmentConstraints(QualType lhsType, QualType rhsType) { + // Get canonical types. We're not formatting these types, just comparing + // them. + lhsType = lhsType.getCanonicalType(); + rhsType = rhsType.getCanonicalType(); + + if (lhsType.getUnqualifiedType() == rhsType.getUnqualifiedType()) + return Compatible; // Common case: fast path an exact match. + + if (lhsType->isReferenceType() || rhsType->isReferenceType()) { + if (Context.referenceTypesAreCompatible(lhsType, rhsType)) + return Compatible; + return Incompatible; + } + + if (lhsType->isObjCQualifiedIdType() + || rhsType->isObjCQualifiedIdType()) { + if (Context.ObjCQualifiedIdTypesAreCompatible(lhsType, rhsType)) + return Compatible; + return Incompatible; + } + + if (lhsType->isVectorType() || rhsType->isVectorType()) { + // For OCUVector, allow vector splats; float -> <n x float> + if (const OCUVectorType *LV = lhsType->getAsOCUVectorType()) { + if (LV->getElementType().getTypePtr() == rhsType.getTypePtr()) + return Compatible; + } + + // If LHS and RHS are both vectors of integer or both vectors of floating + // point types, and the total vector length is the same, allow the + // conversion. This is a bitcast; no bits are changed but the result type + // is different. + if (getLangOptions().LaxVectorConversions && + lhsType->isVectorType() && rhsType->isVectorType()) { + if ((lhsType->isIntegerType() && rhsType->isIntegerType()) || + (lhsType->isRealFloatingType() && rhsType->isRealFloatingType())) { + if (Context.getTypeSize(lhsType) == Context.getTypeSize(rhsType)) + return Compatible; + } + } + return Incompatible; + } + + if (lhsType->isArithmeticType() && rhsType->isArithmeticType()) + return Compatible; + + if (lhsType->isPointerType()) { + if (rhsType->isIntegerType()) + return IntToPointer; + + if (rhsType->isPointerType()) + return CheckPointerTypesForAssignment(lhsType, rhsType); + return Incompatible; + } + + if (rhsType->isPointerType()) { + // C99 6.5.16.1p1: the left operand is _Bool and the right is a pointer. + if ((lhsType->isIntegerType()) && (lhsType != Context.BoolTy)) + return PointerToInt; + + if (lhsType->isPointerType()) + return CheckPointerTypesForAssignment(lhsType, rhsType); + return Incompatible; + } + + if (isa<TagType>(lhsType) && isa<TagType>(rhsType)) { + if (Context.tagTypesAreCompatible(lhsType, rhsType)) + return Compatible; + } + return Incompatible; +} + +Sema::AssignConvertType +Sema::CheckSingleAssignmentConstraints(QualType lhsType, Expr *&rExpr) { + // C99 6.5.16.1p1: the left operand is a pointer and the right is + // a null pointer constant. + if ((lhsType->isPointerType() || lhsType->isObjCQualifiedIdType()) + && rExpr->isNullPointerConstant(Context)) { + ImpCastExprToType(rExpr, lhsType); + return Compatible; + } + // This check seems unnatural, however it is necessary to ensure the proper + // conversion of functions/arrays. If the conversion were done for all + // DeclExpr's (created by ActOnIdentifierExpr), it would mess up the unary + // expressions that surpress this implicit conversion (&, sizeof). + // + // Suppress this for references: C99 8.5.3p5. FIXME: revisit when references + // are better understood. + if (!lhsType->isReferenceType()) + DefaultFunctionArrayConversion(rExpr); + + Sema::AssignConvertType result = + CheckAssignmentConstraints(lhsType, rExpr->getType()); + + // C99 6.5.16.1p2: The value of the right operand is converted to the + // type of the assignment expression. + if (rExpr->getType() != lhsType) + ImpCastExprToType(rExpr, lhsType); + return result; +} + +Sema::AssignConvertType +Sema::CheckCompoundAssignmentConstraints(QualType lhsType, QualType rhsType) { + return CheckAssignmentConstraints(lhsType, rhsType); +} + +QualType Sema::InvalidOperands(SourceLocation loc, Expr *&lex, Expr *&rex) { + Diag(loc, diag::err_typecheck_invalid_operands, + lex->getType().getAsString(), rex->getType().getAsString(), + lex->getSourceRange(), rex->getSourceRange()); + return QualType(); +} + +inline QualType Sema::CheckVectorOperands(SourceLocation loc, Expr *&lex, + Expr *&rex) { + QualType lhsType = lex->getType(), rhsType = rex->getType(); + + // make sure the vector types are identical. + if (lhsType == rhsType) + return lhsType; + + // if the lhs is an ocu vector and the rhs is a scalar of the same type, + // promote the rhs to the vector type. + if (const OCUVectorType *V = lhsType->getAsOCUVectorType()) { + if (V->getElementType().getCanonicalType().getTypePtr() + == rhsType.getCanonicalType().getTypePtr()) { + ImpCastExprToType(rex, lhsType); + return lhsType; + } + } + + // if the rhs is an ocu vector and the lhs is a scalar of the same type, + // promote the lhs to the vector type. + if (const OCUVectorType *V = rhsType->getAsOCUVectorType()) { + if (V->getElementType().getCanonicalType().getTypePtr() + == lhsType.getCanonicalType().getTypePtr()) { + ImpCastExprToType(lex, rhsType); + return rhsType; + } + } + + // You cannot convert between vector values of different size. + Diag(loc, diag::err_typecheck_vector_not_convertable, + lex->getType().getAsString(), rex->getType().getAsString(), + lex->getSourceRange(), rex->getSourceRange()); + return QualType(); +} + +inline QualType Sema::CheckMultiplyDivideOperands( + Expr *&lex, Expr *&rex, SourceLocation loc, bool isCompAssign) +{ + QualType lhsType = lex->getType(), rhsType = rex->getType(); + + if (lhsType->isVectorType() || rhsType->isVectorType()) + return CheckVectorOperands(loc, lex, rex); + + QualType compType = UsualArithmeticConversions(lex, rex, isCompAssign); + + if (lex->getType()->isArithmeticType() && rex->getType()->isArithmeticType()) + return compType; + return InvalidOperands(loc, lex, rex); +} + +inline QualType Sema::CheckRemainderOperands( + Expr *&lex, Expr *&rex, SourceLocation loc, bool isCompAssign) +{ + QualType lhsType = lex->getType(), rhsType = rex->getType(); + + QualType compType = UsualArithmeticConversions(lex, rex, isCompAssign); + + if (lex->getType()->isIntegerType() && rex->getType()->isIntegerType()) + return compType; + return InvalidOperands(loc, lex, rex); +} + +inline QualType Sema::CheckAdditionOperands( // C99 6.5.6 + Expr *&lex, Expr *&rex, SourceLocation loc, bool isCompAssign) +{ + if (lex->getType()->isVectorType() || rex->getType()->isVectorType()) + return CheckVectorOperands(loc, lex, rex); + + QualType compType = UsualArithmeticConversions(lex, rex, isCompAssign); + + // handle the common case first (both operands are arithmetic). + if (lex->getType()->isArithmeticType() && rex->getType()->isArithmeticType()) + return compType; + + if (lex->getType()->isPointerType() && rex->getType()->isIntegerType()) + return lex->getType(); + if (lex->getType()->isIntegerType() && rex->getType()->isPointerType()) + return rex->getType(); + return InvalidOperands(loc, lex, rex); +} + +inline QualType Sema::CheckSubtractionOperands( // C99 6.5.6 + Expr *&lex, Expr *&rex, SourceLocation loc, bool isCompAssign) +{ + if (lex->getType()->isVectorType() || rex->getType()->isVectorType()) + return CheckVectorOperands(loc, lex, rex); + + QualType compType = UsualArithmeticConversions(lex, rex, isCompAssign); + + // Enforce type constraints: C99 6.5.6p3. + + // Handle the common case first (both operands are arithmetic). + if (lex->getType()->isArithmeticType() && rex->getType()->isArithmeticType()) + return compType; + + // Either ptr - int or ptr - ptr. + if (const PointerType *LHSPTy = lex->getType()->getAsPointerType()) { + QualType lpointee = LHSPTy->getPointeeType(); + + // The LHS must be an object type, not incomplete, function, etc. + if (!lpointee->isObjectType()) { + // Handle the GNU void* extension. + if (lpointee->isVoidType()) { + Diag(loc, diag::ext_gnu_void_ptr, + lex->getSourceRange(), rex->getSourceRange()); + } else { + Diag(loc, diag::err_typecheck_sub_ptr_object, + lex->getType().getAsString(), lex->getSourceRange()); + return QualType(); + } + } + + // The result type of a pointer-int computation is the pointer type. + if (rex->getType()->isIntegerType()) + return lex->getType(); + + // Handle pointer-pointer subtractions. + if (const PointerType *RHSPTy = rex->getType()->getAsPointerType()) { + QualType rpointee = RHSPTy->getPointeeType(); + + // RHS must be an object type, unless void (GNU). + if (!rpointee->isObjectType()) { + // Handle the GNU void* extension. + if (rpointee->isVoidType()) { + if (!lpointee->isVoidType()) + Diag(loc, diag::ext_gnu_void_ptr, + lex->getSourceRange(), rex->getSourceRange()); + } else { + Diag(loc, diag::err_typecheck_sub_ptr_object, + rex->getType().getAsString(), rex->getSourceRange()); + return QualType(); + } + } + + // Pointee types must be compatible. + if (!Context.typesAreCompatible(lpointee.getUnqualifiedType(), + rpointee.getUnqualifiedType())) { + Diag(loc, diag::err_typecheck_sub_ptr_compatible, + lex->getType().getAsString(), rex->getType().getAsString(), + lex->getSourceRange(), rex->getSourceRange()); + return QualType(); + } + + return Context.getPointerDiffType(); + } + } + + return InvalidOperands(loc, lex, rex); +} + +inline QualType Sema::CheckShiftOperands( // C99 6.5.7 + Expr *&lex, Expr *&rex, SourceLocation loc, bool isCompAssign) { + // C99 6.5.7p2: Each of the operands shall have integer type. + if (!lex->getType()->isIntegerType() || !rex->getType()->isIntegerType()) + return InvalidOperands(loc, lex, rex); + + // Shifts don't perform usual arithmetic conversions, they just do integer + // promotions on each operand. C99 6.5.7p3 + if (!isCompAssign) + UsualUnaryConversions(lex); + UsualUnaryConversions(rex); + + // "The type of the result is that of the promoted left operand." + return lex->getType(); +} + +inline QualType Sema::CheckCompareOperands( // C99 6.5.8 + Expr *&lex, Expr *&rex, SourceLocation loc, bool isRelational) +{ + // C99 6.5.8p3 / C99 6.5.9p4 + if (lex->getType()->isArithmeticType() && rex->getType()->isArithmeticType()) + UsualArithmeticConversions(lex, rex); + else { + UsualUnaryConversions(lex); + UsualUnaryConversions(rex); + } + QualType lType = lex->getType(); + QualType rType = rex->getType(); + + // For non-floating point types, check for self-comparisons of the form + // x == x, x != x, x < x, etc. These always evaluate to a constant, and + // often indicate logic errors in the program. + if (!lType->isFloatingType()) { + if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(lex->IgnoreParens())) + if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(rex->IgnoreParens())) + if (DRL->getDecl() == DRR->getDecl()) + Diag(loc, diag::warn_selfcomparison); + } + + if (isRelational) { + if (lType->isRealType() && rType->isRealType()) + return Context.IntTy; + } else { + // Check for comparisons of floating point operands using != and ==. + if (lType->isFloatingType()) { + assert (rType->isFloatingType()); + CheckFloatComparison(loc,lex,rex); + } + + if (lType->isArithmeticType() && rType->isArithmeticType()) + return Context.IntTy; + } + + bool LHSIsNull = lex->isNullPointerConstant(Context); + bool RHSIsNull = rex->isNullPointerConstant(Context); + + // All of the following pointer related warnings are GCC extensions, except + // when handling null pointer constants. One day, we can consider making them + // errors (when -pedantic-errors is enabled). + if (lType->isPointerType() && rType->isPointerType()) { // C99 6.5.8p2 + QualType lpointee = lType->getAsPointerType()->getPointeeType(); + QualType rpointee = rType->getAsPointerType()->getPointeeType(); + + if (!LHSIsNull && !RHSIsNull && // C99 6.5.9p2 + !lpointee->isVoidType() && !lpointee->isVoidType() && + !Context.typesAreCompatible(lpointee.getUnqualifiedType(), + rpointee.getUnqualifiedType())) { + Diag(loc, diag::ext_typecheck_comparison_of_distinct_pointers, + lType.getAsString(), rType.getAsString(), + lex->getSourceRange(), rex->getSourceRange()); + } + ImpCastExprToType(rex, lType); // promote the pointer to pointer + return Context.IntTy; + } + if ((lType->isObjCQualifiedIdType() || rType->isObjCQualifiedIdType()) + && Context.ObjCQualifiedIdTypesAreCompatible(lType, rType, true)) { + ImpCastExprToType(rex, lType); + return Context.IntTy; + } + if (lType->isPointerType() && rType->isIntegerType()) { + if (!RHSIsNull) + Diag(loc, diag::ext_typecheck_comparison_of_pointer_integer, + lType.getAsString(), rType.getAsString(), + lex->getSourceRange(), rex->getSourceRange()); + ImpCastExprToType(rex, lType); // promote the integer to pointer + return Context.IntTy; + } + if (lType->isIntegerType() && rType->isPointerType()) { + if (!LHSIsNull) + Diag(loc, diag::ext_typecheck_comparison_of_pointer_integer, + lType.getAsString(), rType.getAsString(), + lex->getSourceRange(), rex->getSourceRange()); + ImpCastExprToType(lex, rType); // promote the integer to pointer + return Context.IntTy; + } + return InvalidOperands(loc, lex, rex); +} + +inline QualType Sema::CheckBitwiseOperands( + Expr *&lex, Expr *&rex, SourceLocation loc, bool isCompAssign) +{ + if (lex->getType()->isVectorType() || rex->getType()->isVectorType()) + return CheckVectorOperands(loc, lex, rex); + + QualType compType = UsualArithmeticConversions(lex, rex, isCompAssign); + + if (lex->getType()->isIntegerType() && rex->getType()->isIntegerType()) + return compType; + return InvalidOperands(loc, lex, rex); +} + +inline QualType Sema::CheckLogicalOperands( // C99 6.5.[13,14] + Expr *&lex, Expr *&rex, SourceLocation loc) +{ + UsualUnaryConversions(lex); + UsualUnaryConversions(rex); + + if (lex->getType()->isScalarType() || rex->getType()->isScalarType()) + return Context.IntTy; + return InvalidOperands(loc, lex, rex); +} + +inline QualType Sema::CheckAssignmentOperands( // C99 6.5.16.1 + Expr *lex, Expr *&rex, SourceLocation loc, QualType compoundType) +{ + QualType lhsType = lex->getType(); + QualType rhsType = compoundType.isNull() ? rex->getType() : compoundType; + Expr::isModifiableLvalueResult mlval = lex->isModifiableLvalue(); + + switch (mlval) { // C99 6.5.16p2 + case Expr::MLV_Valid: + break; + case Expr::MLV_ConstQualified: + Diag(loc, diag::err_typecheck_assign_const, lex->getSourceRange()); + return QualType(); + case Expr::MLV_ArrayType: + Diag(loc, diag::err_typecheck_array_not_modifiable_lvalue, + lhsType.getAsString(), lex->getSourceRange()); + return QualType(); + case Expr::MLV_NotObjectType: + Diag(loc, diag::err_typecheck_non_object_not_modifiable_lvalue, + lhsType.getAsString(), lex->getSourceRange()); + return QualType(); + case Expr::MLV_InvalidExpression: + Diag(loc, diag::err_typecheck_expression_not_modifiable_lvalue, + lex->getSourceRange()); + return QualType(); + case Expr::MLV_IncompleteType: + case Expr::MLV_IncompleteVoidType: + Diag(loc, diag::err_typecheck_incomplete_type_not_modifiable_lvalue, + lhsType.getAsString(), lex->getSourceRange()); + return QualType(); + case Expr::MLV_DuplicateVectorComponents: + Diag(loc, diag::err_typecheck_duplicate_vector_components_not_mlvalue, + lex->getSourceRange()); + return QualType(); + } + + AssignConvertType ConvTy; + if (compoundType.isNull()) + ConvTy = CheckSingleAssignmentConstraints(lhsType, rex); + else + ConvTy = CheckCompoundAssignmentConstraints(lhsType, rhsType); + + if (DiagnoseAssignmentResult(ConvTy, loc, lhsType, rhsType, + rex, "assigning")) + return QualType(); + + // C99 6.5.16p3: The type of an assignment expression is the type of the + // left operand unless the left operand has qualified type, in which case + // it is the unqualified version of the type of the left operand. + // C99 6.5.16.1p2: In simple assignment, the value of the right operand + // is converted to the type of the assignment expression (above). + // C++ 5.17p1: the type of the assignment expression is that of its left + // oprdu. + return lhsType.getUnqualifiedType(); +} + +inline QualType Sema::CheckCommaOperands( // C99 6.5.17 + Expr *&lex, Expr *&rex, SourceLocation loc) { + UsualUnaryConversions(rex); + return rex->getType(); +} + +/// CheckIncrementDecrementOperand - unlike most "Check" methods, this routine +/// doesn't need to call UsualUnaryConversions or UsualArithmeticConversions. +QualType Sema::CheckIncrementDecrementOperand(Expr *op, SourceLocation OpLoc) { + QualType resType = op->getType(); + assert(!resType.isNull() && "no type for increment/decrement expression"); + + // C99 6.5.2.4p1: We allow complex as a GCC extension. + if (const PointerType *pt = resType->getAsPointerType()) { + if (!pt->getPointeeType()->isObjectType()) { // C99 6.5.2.4p2, 6.5.6p2 + Diag(OpLoc, diag::err_typecheck_arithmetic_incomplete_type, + resType.getAsString(), op->getSourceRange()); + return QualType(); + } + } else if (!resType->isRealType()) { + if (resType->isComplexType()) + // C99 does not support ++/-- on complex types. + Diag(OpLoc, diag::ext_integer_increment_complex, + resType.getAsString(), op->getSourceRange()); + else { + Diag(OpLoc, diag::err_typecheck_illegal_increment_decrement, + resType.getAsString(), op->getSourceRange()); + return QualType(); + } + } + // At this point, we know we have a real, complex or pointer type. + // Now make sure the operand is a modifiable lvalue. + Expr::isModifiableLvalueResult mlval = op->isModifiableLvalue(); + if (mlval != Expr::MLV_Valid) { + // FIXME: emit a more precise diagnostic... + Diag(OpLoc, diag::err_typecheck_invalid_lvalue_incr_decr, + op->getSourceRange()); + return QualType(); + } + return resType; +} + +/// getPrimaryDecl - Helper function for CheckAddressOfOperand(). +/// This routine allows us to typecheck complex/recursive expressions +/// where the declaration is needed for type checking. Here are some +/// examples: &s.xx, &s.zz[1].yy, &(1+2), &(XX), &"123"[2]. +static ValueDecl *getPrimaryDecl(Expr *e) { + switch (e->getStmtClass()) { + case Stmt::DeclRefExprClass: + return cast<DeclRefExpr>(e)->getDecl(); + case Stmt::MemberExprClass: + // Fields cannot be declared with a 'register' storage class. + // &X->f is always ok, even if X is declared register. + if (cast<MemberExpr>(e)->isArrow()) + return 0; + return getPrimaryDecl(cast<MemberExpr>(e)->getBase()); + case Stmt::ArraySubscriptExprClass: { + // &X[4] and &4[X] is invalid if X is invalid and X is not a pointer. + + ValueDecl *VD = getPrimaryDecl(cast<ArraySubscriptExpr>(e)->getBase()); + if (!VD || VD->getType()->isPointerType()) + return 0; + else + return VD; + } + case Stmt::UnaryOperatorClass: + return getPrimaryDecl(cast<UnaryOperator>(e)->getSubExpr()); + case Stmt::ParenExprClass: + return getPrimaryDecl(cast<ParenExpr>(e)->getSubExpr()); + case Stmt::ImplicitCastExprClass: + // &X[4] when X is an array, has an implicit cast from array to pointer. + return getPrimaryDecl(cast<ImplicitCastExpr>(e)->getSubExpr()); + default: + return 0; + } +} + +/// CheckAddressOfOperand - The operand of & must be either a function +/// designator or an lvalue designating an object. If it is an lvalue, the +/// object cannot be declared with storage class register or be a bit field. +/// Note: The usual conversions are *not* applied to the operand of the & +/// operator (C99 6.3.2.1p[2-4]), and its result is never an lvalue. +QualType Sema::CheckAddressOfOperand(Expr *op, SourceLocation OpLoc) { + if (getLangOptions().C99) { + // Implement C99-only parts of addressof rules. + if (UnaryOperator* uOp = dyn_cast<UnaryOperator>(op)) { + if (uOp->getOpcode() == UnaryOperator::Deref) + // Per C99 6.5.3.2, the address of a deref always returns a valid result + // (assuming the deref expression is valid). + return uOp->getSubExpr()->getType(); + } + // Technically, there should be a check for array subscript + // expressions here, but the result of one is always an lvalue anyway. + } + ValueDecl *dcl = getPrimaryDecl(op); + Expr::isLvalueResult lval = op->isLvalue(); + + if (lval != Expr::LV_Valid) { // C99 6.5.3.2p1 + if (!dcl || !isa<FunctionDecl>(dcl)) {// allow function designators + // FIXME: emit more specific diag... + Diag(OpLoc, diag::err_typecheck_invalid_lvalue_addrof, + op->getSourceRange()); + return QualType(); + } + } else if (MemberExpr *MemExpr = dyn_cast<MemberExpr>(op)) { // C99 6.5.3.2p1 + if (MemExpr->getMemberDecl()->isBitField()) { + Diag(OpLoc, diag::err_typecheck_address_of, + std::string("bit-field"), op->getSourceRange()); + return QualType(); + } + // Check for Apple extension for accessing vector components. + } else if (isa<ArraySubscriptExpr>(op) && + cast<ArraySubscriptExpr>(op)->getBase()->getType()->isVectorType()) { + Diag(OpLoc, diag::err_typecheck_address_of, + std::string("vector"), op->getSourceRange()); + return QualType(); + } else if (dcl) { // C99 6.5.3.2p1 + // We have an lvalue with a decl. Make sure the decl is not declared + // with the register storage-class specifier. + if (const VarDecl *vd = dyn_cast<VarDecl>(dcl)) { + if (vd->getStorageClass() == VarDecl::Register) { + Diag(OpLoc, diag::err_typecheck_address_of, + std::string("register variable"), op->getSourceRange()); + return QualType(); + } + } else + assert(0 && "Unknown/unexpected decl type"); + } + // If the operand has type "type", the result has type "pointer to type". + return Context.getPointerType(op->getType()); +} + +QualType Sema::CheckIndirectionOperand(Expr *op, SourceLocation OpLoc) { + UsualUnaryConversions(op); + QualType qType = op->getType(); + + if (const PointerType *PT = qType->getAsPointerType()) { + // Note that per both C89 and C99, this is always legal, even + // if ptype is an incomplete type or void. + // It would be possible to warn about dereferencing a + // void pointer, but it's completely well-defined, + // and such a warning is unlikely to catch any mistakes. + return PT->getPointeeType(); + } + Diag(OpLoc, diag::err_typecheck_indirection_requires_pointer, + qType.getAsString(), op->getSourceRange()); + return QualType(); +} + +static inline BinaryOperator::Opcode ConvertTokenKindToBinaryOpcode( + tok::TokenKind Kind) { + BinaryOperator::Opcode Opc; + switch (Kind) { + default: assert(0 && "Unknown binop!"); + case tok::star: Opc = BinaryOperator::Mul; break; + case tok::slash: Opc = BinaryOperator::Div; break; + case tok::percent: Opc = BinaryOperator::Rem; break; + case tok::plus: Opc = BinaryOperator::Add; break; + case tok::minus: Opc = BinaryOperator::Sub; break; + case tok::lessless: Opc = BinaryOperator::Shl; break; + case tok::greatergreater: Opc = BinaryOperator::Shr; break; + case tok::lessequal: Opc = BinaryOperator::LE; break; + case tok::less: Opc = BinaryOperator::LT; break; + case tok::greaterequal: Opc = BinaryOperator::GE; break; + case tok::greater: Opc = BinaryOperator::GT; break; + case tok::exclaimequal: Opc = BinaryOperator::NE; break; + case tok::equalequal: Opc = BinaryOperator::EQ; break; + case tok::amp: Opc = BinaryOperator::And; break; + case tok::caret: Opc = BinaryOperator::Xor; break; + case tok::pipe: Opc = BinaryOperator::Or; break; + case tok::ampamp: Opc = BinaryOperator::LAnd; break; + case tok::pipepipe: Opc = BinaryOperator::LOr; break; + case tok::equal: Opc = BinaryOperator::Assign; break; + case tok::starequal: Opc = BinaryOperator::MulAssign; break; + case tok::slashequal: Opc = BinaryOperator::DivAssign; break; + case tok::percentequal: Opc = BinaryOperator::RemAssign; break; + case tok::plusequal: Opc = BinaryOperator::AddAssign; break; + case tok::minusequal: Opc = BinaryOperator::SubAssign; break; + case tok::lesslessequal: Opc = BinaryOperator::ShlAssign; break; + case tok::greatergreaterequal: Opc = BinaryOperator::ShrAssign; break; + case tok::ampequal: Opc = BinaryOperator::AndAssign; break; + case tok::caretequal: Opc = BinaryOperator::XorAssign; break; + case tok::pipeequal: Opc = BinaryOperator::OrAssign; break; + case tok::comma: Opc = BinaryOperator::Comma; break; + } + return Opc; +} + +static inline UnaryOperator::Opcode ConvertTokenKindToUnaryOpcode( + tok::TokenKind Kind) { + UnaryOperator::Opcode Opc; + switch (Kind) { + default: assert(0 && "Unknown unary op!"); + case tok::plusplus: Opc = UnaryOperator::PreInc; break; + case tok::minusminus: Opc = UnaryOperator::PreDec; break; + case tok::amp: Opc = UnaryOperator::AddrOf; break; + case tok::star: Opc = UnaryOperator::Deref; break; + case tok::plus: Opc = UnaryOperator::Plus; break; + case tok::minus: Opc = UnaryOperator::Minus; break; + case tok::tilde: Opc = UnaryOperator::Not; break; + case tok::exclaim: Opc = UnaryOperator::LNot; break; + case tok::kw_sizeof: Opc = UnaryOperator::SizeOf; break; + case tok::kw___alignof: Opc = UnaryOperator::AlignOf; break; + case tok::kw___real: Opc = UnaryOperator::Real; break; + case tok::kw___imag: Opc = UnaryOperator::Imag; break; + case tok::kw___extension__: Opc = UnaryOperator::Extension; break; + } + return Opc; +} + +// Binary Operators. 'Tok' is the token for the operator. +Action::ExprResult Sema::ActOnBinOp(SourceLocation TokLoc, tok::TokenKind Kind, + ExprTy *LHS, ExprTy *RHS) { + BinaryOperator::Opcode Opc = ConvertTokenKindToBinaryOpcode(Kind); + Expr *lhs = (Expr *)LHS, *rhs = (Expr*)RHS; + + assert((lhs != 0) && "ActOnBinOp(): missing left expression"); + assert((rhs != 0) && "ActOnBinOp(): missing right expression"); + + QualType ResultTy; // Result type of the binary operator. + QualType CompTy; // Computation type for compound assignments (e.g. '+=') + + switch (Opc) { + default: + assert(0 && "Unknown binary expr!"); + case BinaryOperator::Assign: + ResultTy = CheckAssignmentOperands(lhs, rhs, TokLoc, QualType()); + break; + case BinaryOperator::Mul: + case BinaryOperator::Div: + ResultTy = CheckMultiplyDivideOperands(lhs, rhs, TokLoc); + break; + case BinaryOperator::Rem: + ResultTy = CheckRemainderOperands(lhs, rhs, TokLoc); + break; + case BinaryOperator::Add: + ResultTy = CheckAdditionOperands(lhs, rhs, TokLoc); + break; + case BinaryOperator::Sub: + ResultTy = CheckSubtractionOperands(lhs, rhs, TokLoc); + break; + case BinaryOperator::Shl: + case BinaryOperator::Shr: + ResultTy = CheckShiftOperands(lhs, rhs, TokLoc); + break; + case BinaryOperator::LE: + case BinaryOperator::LT: + case BinaryOperator::GE: + case BinaryOperator::GT: + ResultTy = CheckCompareOperands(lhs, rhs, TokLoc, true); + break; + case BinaryOperator::EQ: + case BinaryOperator::NE: + ResultTy = CheckCompareOperands(lhs, rhs, TokLoc, false); + break; + case BinaryOperator::And: + case BinaryOperator::Xor: + case BinaryOperator::Or: + ResultTy = CheckBitwiseOperands(lhs, rhs, TokLoc); + break; + case BinaryOperator::LAnd: + case BinaryOperator::LOr: + ResultTy = CheckLogicalOperands(lhs, rhs, TokLoc); + break; + case BinaryOperator::MulAssign: + case BinaryOperator::DivAssign: + CompTy = CheckMultiplyDivideOperands(lhs, rhs, TokLoc, true); + if (!CompTy.isNull()) + ResultTy = CheckAssignmentOperands(lhs, rhs, TokLoc, CompTy); + break; + case BinaryOperator::RemAssign: + CompTy = CheckRemainderOperands(lhs, rhs, TokLoc, true); + if (!CompTy.isNull()) + ResultTy = CheckAssignmentOperands(lhs, rhs, TokLoc, CompTy); + break; + case BinaryOperator::AddAssign: + CompTy = CheckAdditionOperands(lhs, rhs, TokLoc, true); + if (!CompTy.isNull()) + ResultTy = CheckAssignmentOperands(lhs, rhs, TokLoc, CompTy); + break; + case BinaryOperator::SubAssign: + CompTy = CheckSubtractionOperands(lhs, rhs, TokLoc, true); + if (!CompTy.isNull()) + ResultTy = CheckAssignmentOperands(lhs, rhs, TokLoc, CompTy); + break; + case BinaryOperator::ShlAssign: + case BinaryOperator::ShrAssign: + CompTy = CheckShiftOperands(lhs, rhs, TokLoc, true); + if (!CompTy.isNull()) + ResultTy = CheckAssignmentOperands(lhs, rhs, TokLoc, CompTy); + break; + case BinaryOperator::AndAssign: + case BinaryOperator::XorAssign: + case BinaryOperator::OrAssign: + CompTy = CheckBitwiseOperands(lhs, rhs, TokLoc, true); + if (!CompTy.isNull()) + ResultTy = CheckAssignmentOperands(lhs, rhs, TokLoc, CompTy); + break; + case BinaryOperator::Comma: + ResultTy = CheckCommaOperands(lhs, rhs, TokLoc); + break; + } + if (ResultTy.isNull()) + return true; + if (CompTy.isNull()) + return new BinaryOperator(lhs, rhs, Opc, ResultTy, TokLoc); + else + return new CompoundAssignOperator(lhs, rhs, Opc, ResultTy, CompTy, TokLoc); +} + +// Unary Operators. 'Tok' is the token for the operator. +Action::ExprResult Sema::ActOnUnaryOp(SourceLocation OpLoc, tok::TokenKind Op, + ExprTy *input) { + Expr *Input = (Expr*)input; + UnaryOperator::Opcode Opc = ConvertTokenKindToUnaryOpcode(Op); + QualType resultType; + switch (Opc) { + default: + assert(0 && "Unimplemented unary expr!"); + case UnaryOperator::PreInc: + case UnaryOperator::PreDec: + resultType = CheckIncrementDecrementOperand(Input, OpLoc); + break; + case UnaryOperator::AddrOf: + resultType = CheckAddressOfOperand(Input, OpLoc); + break; + case UnaryOperator::Deref: + DefaultFunctionArrayConversion(Input); + resultType = CheckIndirectionOperand(Input, OpLoc); + break; + case UnaryOperator::Plus: + case UnaryOperator::Minus: + UsualUnaryConversions(Input); + resultType = Input->getType(); + if (!resultType->isArithmeticType()) // C99 6.5.3.3p1 + return Diag(OpLoc, diag::err_typecheck_unary_expr, + resultType.getAsString()); + break; + case UnaryOperator::Not: // bitwise complement + UsualUnaryConversions(Input); + resultType = Input->getType(); + // C99 6.5.3.3p1. We allow complex as a GCC extension. + if (!resultType->isIntegerType()) { + if (resultType->isComplexType()) + // C99 does not support '~' for complex conjugation. + Diag(OpLoc, diag::ext_integer_complement_complex, + resultType.getAsString()); + else + return Diag(OpLoc, diag::err_typecheck_unary_expr, + resultType.getAsString()); + } + break; + case UnaryOperator::LNot: // logical negation + // Unlike +/-/~, integer promotions aren't done here (C99 6.5.3.3p5). + DefaultFunctionArrayConversion(Input); + resultType = Input->getType(); + if (!resultType->isScalarType()) // C99 6.5.3.3p1 + return Diag(OpLoc, diag::err_typecheck_unary_expr, + resultType.getAsString()); + // LNot always has type int. C99 6.5.3.3p5. + resultType = Context.IntTy; + break; + case UnaryOperator::SizeOf: + resultType = CheckSizeOfAlignOfOperand(Input->getType(), OpLoc, true); + break; + case UnaryOperator::AlignOf: + resultType = CheckSizeOfAlignOfOperand(Input->getType(), OpLoc, false); + break; + case UnaryOperator::Real: + case UnaryOperator::Imag: + resultType = CheckRealImagOperand(Input, OpLoc); + break; + case UnaryOperator::Extension: + resultType = Input->getType(); + break; + } + if (resultType.isNull()) + return true; + return new UnaryOperator(Input, Opc, resultType, OpLoc); +} + +/// ActOnAddrLabel - Parse the GNU address of label extension: "&&foo". +Sema::ExprResult Sema::ActOnAddrLabel(SourceLocation OpLoc, + SourceLocation LabLoc, + IdentifierInfo *LabelII) { + // Look up the record for this label identifier. + LabelStmt *&LabelDecl = LabelMap[LabelII]; + + // If we haven't seen this label yet, create a forward reference. + if (LabelDecl == 0) + LabelDecl = new LabelStmt(LabLoc, LabelII, 0); + + // Create the AST node. The address of a label always has type 'void*'. + return new AddrLabelExpr(OpLoc, LabLoc, LabelDecl, + Context.getPointerType(Context.VoidTy)); +} + +Sema::ExprResult Sema::ActOnStmtExpr(SourceLocation LPLoc, StmtTy *substmt, + SourceLocation RPLoc) { // "({..})" + Stmt *SubStmt = static_cast<Stmt*>(substmt); + assert(SubStmt && isa<CompoundStmt>(SubStmt) && "Invalid action invocation!"); + CompoundStmt *Compound = cast<CompoundStmt>(SubStmt); + + // FIXME: there are a variety of strange constraints to enforce here, for + // example, it is not possible to goto into a stmt expression apparently. + // More semantic analysis is needed. + + // FIXME: the last statement in the compount stmt has its value used. We + // should not warn about it being unused. + + // If there are sub stmts in the compound stmt, take the type of the last one + // as the type of the stmtexpr. + QualType Ty = Context.VoidTy; + + if (!Compound->body_empty()) + if (Expr *LastExpr = dyn_cast<Expr>(Compound->body_back())) + Ty = LastExpr->getType(); + + return new StmtExpr(Compound, Ty, LPLoc, RPLoc); +} + +Sema::ExprResult Sema::ActOnBuiltinOffsetOf(SourceLocation BuiltinLoc, + SourceLocation TypeLoc, + TypeTy *argty, + OffsetOfComponent *CompPtr, + unsigned NumComponents, + SourceLocation RPLoc) { + QualType ArgTy = QualType::getFromOpaquePtr(argty); + assert(!ArgTy.isNull() && "Missing type argument!"); + + // We must have at least one component that refers to the type, and the first + // one is known to be a field designator. Verify that the ArgTy represents + // a struct/union/class. + if (!ArgTy->isRecordType()) + return Diag(TypeLoc, diag::err_offsetof_record_type,ArgTy.getAsString()); + + // Otherwise, create a compound literal expression as the base, and + // iteratively process the offsetof designators. + Expr *Res = new CompoundLiteralExpr(SourceLocation(), ArgTy, 0, false); + + // offsetof with non-identifier designators (e.g. "offsetof(x, a.b[c])") are a + // GCC extension, diagnose them. + if (NumComponents != 1) + Diag(BuiltinLoc, diag::ext_offsetof_extended_field_designator, + SourceRange(CompPtr[1].LocStart, CompPtr[NumComponents-1].LocEnd)); + + for (unsigned i = 0; i != NumComponents; ++i) { + const OffsetOfComponent &OC = CompPtr[i]; + if (OC.isBrackets) { + // Offset of an array sub-field. TODO: Should we allow vector elements? + const ArrayType *AT = Res->getType()->getAsArrayType(); + if (!AT) { + delete Res; + return Diag(OC.LocEnd, diag::err_offsetof_array_type, + Res->getType().getAsString()); + } + + // FIXME: C++: Verify that operator[] isn't overloaded. + + // C99 6.5.2.1p1 + Expr *Idx = static_cast<Expr*>(OC.U.E); + if (!Idx->getType()->isIntegerType()) + return Diag(Idx->getLocStart(), diag::err_typecheck_subscript, + Idx->getSourceRange()); + + Res = new ArraySubscriptExpr(Res, Idx, AT->getElementType(), OC.LocEnd); + continue; + } + + const RecordType *RC = Res->getType()->getAsRecordType(); + if (!RC) { + delete Res; + return Diag(OC.LocEnd, diag::err_offsetof_record_type, + Res->getType().getAsString()); + } + + // Get the decl corresponding to this. + RecordDecl *RD = RC->getDecl(); + FieldDecl *MemberDecl = RD->getMember(OC.U.IdentInfo); + if (!MemberDecl) + return Diag(BuiltinLoc, diag::err_typecheck_no_member, + OC.U.IdentInfo->getName(), + SourceRange(OC.LocStart, OC.LocEnd)); + + // FIXME: C++: Verify that MemberDecl isn't a static field. + // FIXME: Verify that MemberDecl isn't a bitfield. + // MemberDecl->getType() doesn't get the right qualifiers, but it doesn't + // matter here. + Res = new MemberExpr(Res, false, MemberDecl, OC.LocEnd, MemberDecl->getType()); + } + + return new UnaryOperator(Res, UnaryOperator::OffsetOf, Context.getSizeType(), + BuiltinLoc); +} + + +Sema::ExprResult Sema::ActOnTypesCompatibleExpr(SourceLocation BuiltinLoc, + TypeTy *arg1, TypeTy *arg2, + SourceLocation RPLoc) { + QualType argT1 = QualType::getFromOpaquePtr(arg1); + QualType argT2 = QualType::getFromOpaquePtr(arg2); + + assert((!argT1.isNull() && !argT2.isNull()) && "Missing type argument(s)"); + + return new TypesCompatibleExpr(Context.IntTy, BuiltinLoc, argT1, argT2,RPLoc); +} + +Sema::ExprResult Sema::ActOnChooseExpr(SourceLocation BuiltinLoc, ExprTy *cond, + ExprTy *expr1, ExprTy *expr2, + SourceLocation RPLoc) { + Expr *CondExpr = static_cast<Expr*>(cond); + Expr *LHSExpr = static_cast<Expr*>(expr1); + Expr *RHSExpr = static_cast<Expr*>(expr2); + + assert((CondExpr && LHSExpr && RHSExpr) && "Missing type argument(s)"); + + // The conditional expression is required to be a constant expression. + llvm::APSInt condEval(32); + SourceLocation ExpLoc; + if (!CondExpr->isIntegerConstantExpr(condEval, Context, &ExpLoc)) + return Diag(ExpLoc, diag::err_typecheck_choose_expr_requires_constant, + CondExpr->getSourceRange()); + + // If the condition is > zero, then the AST type is the same as the LSHExpr. + QualType resType = condEval.getZExtValue() ? LHSExpr->getType() : + RHSExpr->getType(); + return new ChooseExpr(BuiltinLoc, CondExpr, LHSExpr, RHSExpr, resType, RPLoc); +} + +/// ExprsMatchFnType - return true if the Exprs in array Args have +/// QualTypes that match the QualTypes of the arguments of the FnType. +/// The number of arguments has already been validated to match the number of +/// arguments in FnType. +static bool ExprsMatchFnType(Expr **Args, const FunctionTypeProto *FnType) { + unsigned NumParams = FnType->getNumArgs(); + for (unsigned i = 0; i != NumParams; ++i) + if (Args[i]->getType().getCanonicalType() != + FnType->getArgType(i).getCanonicalType()) + return false; + return true; +} + +Sema::ExprResult Sema::ActOnOverloadExpr(ExprTy **args, unsigned NumArgs, + SourceLocation *CommaLocs, + SourceLocation BuiltinLoc, + SourceLocation RParenLoc) { + // __builtin_overload requires at least 2 arguments + if (NumArgs < 2) + return Diag(RParenLoc, diag::err_typecheck_call_too_few_args, + SourceRange(BuiltinLoc, RParenLoc)); + + // The first argument is required to be a constant expression. It tells us + // the number of arguments to pass to each of the functions to be overloaded. + Expr **Args = reinterpret_cast<Expr**>(args); + Expr *NParamsExpr = Args[0]; + llvm::APSInt constEval(32); + SourceLocation ExpLoc; + if (!NParamsExpr->isIntegerConstantExpr(constEval, Context, &ExpLoc)) + return Diag(ExpLoc, diag::err_overload_expr_requires_non_zero_constant, + NParamsExpr->getSourceRange()); + + // Verify that the number of parameters is > 0 + unsigned NumParams = constEval.getZExtValue(); + if (NumParams == 0) + return Diag(ExpLoc, diag::err_overload_expr_requires_non_zero_constant, + NParamsExpr->getSourceRange()); + // Verify that we have at least 1 + NumParams arguments to the builtin. + if ((NumParams + 1) > NumArgs) + return Diag(RParenLoc, diag::err_typecheck_call_too_few_args, + SourceRange(BuiltinLoc, RParenLoc)); + + // Figure out the return type, by matching the args to one of the functions + // listed after the parameters. + OverloadExpr *OE = 0; + for (unsigned i = NumParams + 1; i < NumArgs; ++i) { + // UsualUnaryConversions will convert the function DeclRefExpr into a + // pointer to function. + Expr *Fn = UsualUnaryConversions(Args[i]); + FunctionTypeProto *FnType = 0; + if (const PointerType *PT = Fn->getType()->getAsPointerType()) { + QualType PointeeType = PT->getPointeeType().getCanonicalType(); + FnType = dyn_cast<FunctionTypeProto>(PointeeType); + } + + // The Expr type must be FunctionTypeProto, since FunctionTypeProto has no + // parameters, and the number of parameters must match the value passed to + // the builtin. + if (!FnType || (FnType->getNumArgs() != NumParams)) + return Diag(Fn->getExprLoc(), diag::err_overload_incorrect_fntype, + Fn->getSourceRange()); + + // Scan the parameter list for the FunctionType, checking the QualType of + // each parameter against the QualTypes of the arguments to the builtin. + // If they match, return a new OverloadExpr. + if (ExprsMatchFnType(Args+1, FnType)) { + if (OE) + return Diag(Fn->getExprLoc(), diag::err_overload_multiple_match, + OE->getFn()->getSourceRange()); + // Remember our match, and continue processing the remaining arguments + // to catch any errors. + OE = new OverloadExpr(Args, NumArgs, i, FnType->getResultType(), + BuiltinLoc, RParenLoc); + } + } + // Return the newly created OverloadExpr node, if we succeded in matching + // exactly one of the candidate functions. + if (OE) + return OE; + + // If we didn't find a matching function Expr in the __builtin_overload list + // the return an error. + std::string typeNames; + for (unsigned i = 0; i != NumParams; ++i) { + if (i != 0) typeNames += ", "; + typeNames += Args[i+1]->getType().getAsString(); + } + + return Diag(BuiltinLoc, diag::err_overload_no_match, typeNames, + SourceRange(BuiltinLoc, RParenLoc)); +} + +Sema::ExprResult Sema::ActOnVAArg(SourceLocation BuiltinLoc, + ExprTy *expr, TypeTy *type, + SourceLocation RPLoc) { + Expr *E = static_cast<Expr*>(expr); + QualType T = QualType::getFromOpaquePtr(type); + + InitBuiltinVaListType(); + + if (CheckAssignmentConstraints(Context.getBuiltinVaListType(), E->getType()) + != Compatible) + return Diag(E->getLocStart(), + diag::err_first_argument_to_va_arg_not_of_type_va_list, + E->getType().getAsString(), + E->getSourceRange()); + + // FIXME: Warn if a non-POD type is passed in. + + return new VAArgExpr(BuiltinLoc, E, T, RPLoc); +} + +bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy, + SourceLocation Loc, + QualType DstType, QualType SrcType, + Expr *SrcExpr, const char *Flavor) { + // Decode the result (notice that AST's are still created for extensions). + bool isInvalid = false; + unsigned DiagKind; + switch (ConvTy) { + default: assert(0 && "Unknown conversion type"); + case Compatible: return false; + case PointerToInt: + DiagKind = diag::ext_typecheck_convert_pointer_int; + break; + case IntToPointer: + DiagKind = diag::ext_typecheck_convert_int_pointer; + break; + case IncompatiblePointer: + DiagKind = diag::ext_typecheck_convert_incompatible_pointer; + break; + case FunctionVoidPointer: + DiagKind = diag::ext_typecheck_convert_pointer_void_func; + break; + case CompatiblePointerDiscardsQualifiers: + DiagKind = diag::ext_typecheck_convert_discards_qualifiers; + break; + case Incompatible: + DiagKind = diag::err_typecheck_convert_incompatible; + isInvalid = true; + break; + } + + Diag(Loc, DiagKind, DstType.getAsString(), SrcType.getAsString(), Flavor, + SrcExpr->getSourceRange()); + return isInvalid; +} + diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp new file mode 100644 index 00000000000..e49a43c4728 --- /dev/null +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -0,0 +1,51 @@ +//===--- SemaExprCXX.cpp - Semantic Analysis for Expressions --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements semantic analysis for C++ expressions. +// +//===----------------------------------------------------------------------===// + +#include "Sema.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/ASTContext.h" +using namespace clang; + +/// ActOnCXXCasts - Parse {dynamic,static,reinterpret,const}_cast's. +Action::ExprResult +Sema::ActOnCXXCasts(SourceLocation OpLoc, tok::TokenKind Kind, + SourceLocation LAngleBracketLoc, TypeTy *Ty, + SourceLocation RAngleBracketLoc, + SourceLocation LParenLoc, ExprTy *E, + SourceLocation RParenLoc) { + CXXCastExpr::Opcode Op; + + switch (Kind) { + default: assert(0 && "Unknown C++ cast!"); + case tok::kw_const_cast: Op = CXXCastExpr::ConstCast; break; + case tok::kw_dynamic_cast: Op = CXXCastExpr::DynamicCast; break; + case tok::kw_reinterpret_cast: Op = CXXCastExpr::ReinterpretCast; break; + case tok::kw_static_cast: Op = CXXCastExpr::StaticCast; break; + } + + return new CXXCastExpr(Op, QualType::getFromOpaquePtr(Ty), (Expr*)E, OpLoc); +} + +/// ActOnCXXBoolLiteral - Parse {true,false} literals. +Action::ExprResult +Sema::ActOnCXXBoolLiteral(SourceLocation OpLoc, tok::TokenKind Kind) { + assert((Kind != tok::kw_true || Kind != tok::kw_false) && + "Unknown C++ Boolean value!"); + return new CXXBoolLiteralExpr(Kind == tok::kw_true, Context.BoolTy, OpLoc); +} + +/// ActOnCXXThrow - Parse throw expressions. +Action::ExprResult +Sema::ActOnCXXThrow(SourceLocation OpLoc, ExprTy *E) { + return new CXXThrowExpr((Expr*)E, Context.VoidTy, OpLoc); +} diff --git a/clang/lib/Sema/SemaExprObjC.cpp b/clang/lib/Sema/SemaExprObjC.cpp new file mode 100644 index 00000000000..4cf435c3315 --- /dev/null +++ b/clang/lib/Sema/SemaExprObjC.cpp @@ -0,0 +1,297 @@ +//===--- SemaExprObjC.cpp - Semantic Analysis for ObjC Expressions --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements semantic analysis for Objective-C expressions. +// +//===----------------------------------------------------------------------===// + +#include "Sema.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/Expr.h" +using namespace clang; + +Sema::ExprResult Sema::ParseObjCStringLiteral(SourceLocation *AtLocs, + ExprTy **Strings, + unsigned NumStrings) { + SourceLocation AtLoc = AtLocs[0]; + StringLiteral* S = static_cast<StringLiteral *>(Strings[0]); + if (NumStrings > 1) { + // Concatenate objc strings. + StringLiteral* ES = static_cast<StringLiteral *>(Strings[NumStrings-1]); + SourceLocation EndLoc = ES->getSourceRange().getEnd(); + unsigned Length = 0; + for (unsigned i = 0; i < NumStrings; i++) + Length += static_cast<StringLiteral *>(Strings[i])->getByteLength(); + char *strBuf = new char [Length]; + char *p = strBuf; + bool isWide = false; + for (unsigned i = 0; i < NumStrings; i++) { + S = static_cast<StringLiteral *>(Strings[i]); + if (S->isWide()) + isWide = true; + memcpy(p, S->getStrData(), S->getByteLength()); + p += S->getByteLength(); + delete S; + } + S = new StringLiteral(strBuf, Length, + isWide, Context.getPointerType(Context.CharTy), + AtLoc, EndLoc); + } + + if (CheckBuiltinCFStringArgument(S)) + return true; + + if (Context.getObjCConstantStringInterface().isNull()) { + // Initialize the constant string interface lazily. This assumes + // the NSConstantString interface is seen in this translation unit. + IdentifierInfo *NSIdent = &Context.Idents.get("NSConstantString"); + ScopedDecl *IFace = LookupScopedDecl(NSIdent, Decl::IDNS_Ordinary, + SourceLocation(), TUScope); + ObjCInterfaceDecl *strIFace = dyn_cast_or_null<ObjCInterfaceDecl>(IFace); + if (!strIFace) + return Diag(S->getLocStart(), diag::err_undef_interface, + NSIdent->getName()); + Context.setObjCConstantStringInterface(strIFace); + } + QualType t = Context.getObjCConstantStringInterface(); + t = Context.getPointerType(t); + return new ObjCStringLiteral(S, t, AtLoc); +} + +Sema::ExprResult Sema::ParseObjCEncodeExpression(SourceLocation AtLoc, + SourceLocation EncodeLoc, + SourceLocation LParenLoc, + TypeTy *Ty, + SourceLocation RParenLoc) { + QualType EncodedType = QualType::getFromOpaquePtr(Ty); + + QualType t = Context.getPointerType(Context.CharTy); + return new ObjCEncodeExpr(t, EncodedType, AtLoc, RParenLoc); +} + +Sema::ExprResult Sema::ParseObjCSelectorExpression(Selector Sel, + SourceLocation AtLoc, + SourceLocation SelLoc, + SourceLocation LParenLoc, + SourceLocation RParenLoc) { + QualType t = Context.getObjCSelType(); + return new ObjCSelectorExpr(t, Sel, AtLoc, RParenLoc); +} + +Sema::ExprResult Sema::ParseObjCProtocolExpression(IdentifierInfo *ProtocolId, + SourceLocation AtLoc, + SourceLocation ProtoLoc, + SourceLocation LParenLoc, + SourceLocation RParenLoc) { + ObjCProtocolDecl* PDecl = ObjCProtocols[ProtocolId]; + if (!PDecl) { + Diag(ProtoLoc, diag::err_undeclared_protocol, ProtocolId->getName()); + return true; + } + + QualType t = Context.getObjCProtoType(); + if (t.isNull()) + return true; + t = Context.getPointerType(t); + return new ObjCProtocolExpr(t, PDecl, AtLoc, RParenLoc); +} + +bool Sema::CheckMessageArgumentTypes(Expr **Args, unsigned NumArgs, + ObjCMethodDecl *Method) { + bool anyIncompatibleArgs = false; + + for (unsigned i = 0; i < NumArgs; i++) { + Expr *argExpr = Args[i]; + assert(argExpr && "CheckMessageArgumentTypes(): missing expression"); + + QualType lhsType = Method->getParamDecl(i)->getType(); + QualType rhsType = argExpr->getType(); + + // If necessary, apply function/array conversion. C99 6.7.5.3p[7,8]. + if (const ArrayType *ary = lhsType->getAsArrayType()) + lhsType = Context.getPointerType(ary->getElementType()); + else if (lhsType->isFunctionType()) + lhsType = Context.getPointerType(lhsType); + + AssignConvertType Result = CheckSingleAssignmentConstraints(lhsType, + argExpr); + if (Args[i] != argExpr) // The expression was converted. + Args[i] = argExpr; // Make sure we store the converted expression. + + anyIncompatibleArgs |= + DiagnoseAssignmentResult(Result, argExpr->getLocStart(), lhsType, rhsType, + argExpr, "sending"); + } + return anyIncompatibleArgs; +} + +// ActOnClassMessage - used for both unary and keyword messages. +// ArgExprs is optional - if it is present, the number of expressions +// is obtained from Sel.getNumArgs(). +Sema::ExprResult Sema::ActOnClassMessage( + Scope *S, + IdentifierInfo *receiverName, Selector Sel, + SourceLocation lbrac, SourceLocation rbrac, ExprTy **Args, unsigned NumArgs) +{ + assert(receiverName && "missing receiver class name"); + + Expr **ArgExprs = reinterpret_cast<Expr **>(Args); + ObjCInterfaceDecl* ClassDecl = 0; + if (!strcmp(receiverName->getName(), "super") && CurMethodDecl) { + ClassDecl = CurMethodDecl->getClassInterface()->getSuperClass(); + if (ClassDecl && CurMethodDecl->isInstance()) { + // Synthesize a cast to the super class. This hack allows us to loosely + // represent super without creating a special expression node. + IdentifierInfo &II = Context.Idents.get("self"); + ExprResult ReceiverExpr = ActOnIdentifierExpr(S, lbrac, II, false); + QualType superTy = Context.getObjCInterfaceType(ClassDecl); + superTy = Context.getPointerType(superTy); + ReceiverExpr = ActOnCastExpr(SourceLocation(), superTy.getAsOpaquePtr(), + SourceLocation(), ReceiverExpr.Val); + // We are really in an instance method, redirect. + return ActOnInstanceMessage(ReceiverExpr.Val, Sel, lbrac, rbrac, + Args, NumArgs); + } + // We are sending a message to 'super' within a class method. Do nothing, + // the receiver will pass through as 'super' (how convenient:-). + } else + ClassDecl = getObjCInterfaceDecl(receiverName); + + // FIXME: can ClassDecl ever be null? + ObjCMethodDecl *Method = ClassDecl->lookupClassMethod(Sel); + QualType returnType; + + // Before we give up, check if the selector is an instance method. + if (!Method) + Method = ClassDecl->lookupInstanceMethod(Sel); + if (!Method) { + Diag(lbrac, diag::warn_method_not_found, std::string("+"), Sel.getName(), + SourceRange(lbrac, rbrac)); + returnType = Context.getObjCIdType(); + } else { + returnType = Method->getResultType(); + if (Sel.getNumArgs()) { + if (CheckMessageArgumentTypes(ArgExprs, Sel.getNumArgs(), Method)) + return true; + } + } + return new ObjCMessageExpr(receiverName, Sel, returnType, Method, + lbrac, rbrac, ArgExprs, NumArgs); +} + +// ActOnInstanceMessage - used for both unary and keyword messages. +// ArgExprs is optional - if it is present, the number of expressions +// is obtained from Sel.getNumArgs(). +Sema::ExprResult Sema::ActOnInstanceMessage( + ExprTy *receiver, Selector Sel, + SourceLocation lbrac, SourceLocation rbrac, ExprTy **Args, unsigned NumArgs) +{ + assert(receiver && "missing receiver expression"); + + Expr **ArgExprs = reinterpret_cast<Expr **>(Args); + Expr *RExpr = static_cast<Expr *>(receiver); + QualType receiverType = RExpr->getType().getCanonicalType(); + QualType returnType; + ObjCMethodDecl *Method = 0; + + // FIXME: This code is not stripping off type qualifiers! Should it? + if (receiverType == Context.getObjCIdType().getCanonicalType() || + receiverType == Context.getObjCClassType().getCanonicalType()) { + Method = InstanceMethodPool[Sel].Method; + if (!Method) + Method = FactoryMethodPool[Sel].Method; + if (!Method) { + Diag(lbrac, diag::warn_method_not_found, std::string("-"), Sel.getName(), + SourceRange(lbrac, rbrac)); + returnType = Context.getObjCIdType(); + } else { + returnType = Method->getResultType(); + if (Sel.getNumArgs()) + if (CheckMessageArgumentTypes(ArgExprs, Sel.getNumArgs(), Method)) + return true; + } + } else { + bool receiverIsQualId = isa<ObjCQualifiedIdType>(receiverType); + // FIXME (snaroff): checking in this code from Patrick. Needs to be + // revisited. how do we get the ClassDecl from the receiver expression? + if (!receiverIsQualId) + while (const PointerType *PTy = receiverType->getAsPointerType()) + receiverType = PTy->getPointeeType(); + + ObjCInterfaceDecl* ClassDecl = 0; + if (ObjCQualifiedInterfaceType *QIT = + dyn_cast<ObjCQualifiedInterfaceType>(receiverType)) { + ClassDecl = QIT->getDecl(); + Method = ClassDecl->lookupInstanceMethod(Sel); + if (!Method) { + // search protocols + for (unsigned i = 0; i < QIT->getNumProtocols(); i++) { + ObjCProtocolDecl *PDecl = QIT->getProtocols(i); + if (PDecl && (Method = PDecl->lookupInstanceMethod(Sel))) + break; + } + } + if (!Method) + Diag(lbrac, diag::warn_method_not_found_in_protocol, + std::string("-"), Sel.getName(), + SourceRange(lbrac, rbrac)); + } + else if (ObjCQualifiedIdType *QIT = + dyn_cast<ObjCQualifiedIdType>(receiverType)) { + // search protocols + for (unsigned i = 0; i < QIT->getNumProtocols(); i++) { + ObjCProtocolDecl *PDecl = QIT->getProtocols(i); + if (PDecl && (Method = PDecl->lookupInstanceMethod(Sel))) + break; + } + if (!Method) + Diag(lbrac, diag::warn_method_not_found_in_protocol, + std::string("-"), Sel.getName(), + SourceRange(lbrac, rbrac)); + } + else { + ObjCInterfaceType *OCIReceiver =dyn_cast<ObjCInterfaceType>(receiverType); + if (OCIReceiver == 0) { + Diag(lbrac, diag::error_bad_receiver_type, + RExpr->getType().getAsString()); + return true; + } + ClassDecl = OCIReceiver->getDecl(); + // FIXME: consider using InstanceMethodPool, since it will be faster + // than the following method (which can do *many* linear searches). The + // idea is to add class info to InstanceMethodPool... + Method = ClassDecl->lookupInstanceMethod(Sel); + } + if (!Method) { + // If we have an implementation in scope, check "private" methods. + if (ClassDecl) + if (ObjCImplementationDecl *ImpDecl = + ObjCImplementations[ClassDecl->getIdentifier()]) + Method = ImpDecl->getInstanceMethod(Sel); + // If we still haven't found a method, look in the global pool. This + // behavior isn't very desirable, however we need it for GCC + // compatibility. + if (!Method) + Method = InstanceMethodPool[Sel].Method; + } + if (!Method) { + Diag(lbrac, diag::warn_method_not_found, std::string("-"), Sel.getName(), + SourceRange(lbrac, rbrac)); + returnType = Context.getObjCIdType(); + } else { + returnType = Method->getResultType(); + if (Sel.getNumArgs()) + if (CheckMessageArgumentTypes(ArgExprs, Sel.getNumArgs(), Method)) + return true; + } + } + return new ObjCMessageExpr(RExpr, Sel, returnType, Method, lbrac, rbrac, + ArgExprs, NumArgs); +} diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp new file mode 100644 index 00000000000..14b2fdf7d0e --- /dev/null +++ b/clang/lib/Sema/SemaStmt.cpp @@ -0,0 +1,821 @@ +//===--- SemaStmt.cpp - Semantic Analysis for Statements ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements semantic analysis for statements. +// +//===----------------------------------------------------------------------===// + +#include "Sema.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Stmt.h" +#include "clang/Parse/Scope.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LangOptions.h" +#include "llvm/ADT/SmallString.h" +using namespace clang; + +Sema::StmtResult Sema::ActOnExprStmt(ExprTy *expr) { + Expr *E = static_cast<Expr*>(expr); + assert(E && "ActOnExprStmt(): missing expression"); + return E; +} + + +Sema::StmtResult Sema::ActOnNullStmt(SourceLocation SemiLoc) { + return new NullStmt(SemiLoc); +} + +Sema::StmtResult Sema::ActOnDeclStmt(DeclTy *decl, SourceLocation StartLoc, + SourceLocation EndLoc) { + if (decl == 0) + return true; + + ScopedDecl *SD = cast<ScopedDecl>(static_cast<Decl *>(decl)); + return new DeclStmt(SD, StartLoc, EndLoc); +} + +Action::StmtResult +Sema::ActOnCompoundStmt(SourceLocation L, SourceLocation R, + StmtTy **elts, unsigned NumElts, bool isStmtExpr) { + Stmt **Elts = reinterpret_cast<Stmt**>(elts); + // If we're in C89 mode, check that we don't have any decls after stmts. If + // so, emit an extension diagnostic. + if (!getLangOptions().C99 && !getLangOptions().CPlusPlus) { + // Note that __extension__ can be around a decl. + unsigned i = 0; + // Skip over all declarations. + for (; i != NumElts && isa<DeclStmt>(Elts[i]); ++i) + /*empty*/; + + // We found the end of the list or a statement. Scan for another declstmt. + for (; i != NumElts && !isa<DeclStmt>(Elts[i]); ++i) + /*empty*/; + + if (i != NumElts) { + ScopedDecl *D = cast<DeclStmt>(Elts[i])->getDecl(); + Diag(D->getLocation(), diag::ext_mixed_decls_code); + } + } + // Warn about unused expressions in statements. + for (unsigned i = 0; i != NumElts; ++i) { + Expr *E = dyn_cast<Expr>(Elts[i]); + if (!E) continue; + + // Warn about expressions with unused results. + if (E->hasLocalSideEffect() || E->getType()->isVoidType()) + continue; + + // The last expr in a stmt expr really is used. + if (isStmtExpr && i == NumElts-1) + continue; + + /// DiagnoseDeadExpr - This expression is side-effect free and evaluated in + /// a context where the result is unused. Emit a diagnostic to warn about + /// this. + if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) + Diag(BO->getOperatorLoc(), diag::warn_unused_expr, + BO->getLHS()->getSourceRange(), BO->getRHS()->getSourceRange()); + else if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) + Diag(UO->getOperatorLoc(), diag::warn_unused_expr, + UO->getSubExpr()->getSourceRange()); + else + Diag(E->getExprLoc(), diag::warn_unused_expr, E->getSourceRange()); + } + + return new CompoundStmt(Elts, NumElts, L, R); +} + +Action::StmtResult +Sema::ActOnCaseStmt(SourceLocation CaseLoc, ExprTy *lhsval, + SourceLocation DotDotDotLoc, ExprTy *rhsval, + SourceLocation ColonLoc, StmtTy *subStmt) { + Stmt *SubStmt = static_cast<Stmt*>(subStmt); + Expr *LHSVal = ((Expr *)lhsval), *RHSVal = ((Expr *)rhsval); + assert((LHSVal != 0) && "missing expression in case statement"); + + SourceLocation ExpLoc; + // C99 6.8.4.2p3: The expression shall be an integer constant. + if (!LHSVal->isIntegerConstantExpr(Context, &ExpLoc)) { + Diag(ExpLoc, diag::err_case_label_not_integer_constant_expr, + LHSVal->getSourceRange()); + return SubStmt; + } + + // GCC extension: The expression shall be an integer constant. + if (RHSVal && !RHSVal->isIntegerConstantExpr(Context, &ExpLoc)) { + Diag(ExpLoc, diag::err_case_label_not_integer_constant_expr, + RHSVal->getSourceRange()); + RHSVal = 0; // Recover by just forgetting about it. + } + + if (SwitchStack.empty()) { + Diag(CaseLoc, diag::err_case_not_in_switch); + return SubStmt; + } + + CaseStmt *CS = new CaseStmt(LHSVal, RHSVal, SubStmt, CaseLoc); + SwitchStack.back()->addSwitchCase(CS); + return CS; +} + +Action::StmtResult +Sema::ActOnDefaultStmt(SourceLocation DefaultLoc, SourceLocation ColonLoc, + StmtTy *subStmt, Scope *CurScope) { + Stmt *SubStmt = static_cast<Stmt*>(subStmt); + + if (SwitchStack.empty()) { + Diag(DefaultLoc, diag::err_default_not_in_switch); + return SubStmt; + } + + DefaultStmt *DS = new DefaultStmt(DefaultLoc, SubStmt); + SwitchStack.back()->addSwitchCase(DS); + + return DS; +} + +Action::StmtResult +Sema::ActOnLabelStmt(SourceLocation IdentLoc, IdentifierInfo *II, + SourceLocation ColonLoc, StmtTy *subStmt) { + Stmt *SubStmt = static_cast<Stmt*>(subStmt); + // Look up the record for this label identifier. + LabelStmt *&LabelDecl = LabelMap[II]; + + // If not forward referenced or defined already, just create a new LabelStmt. + if (LabelDecl == 0) + return LabelDecl = new LabelStmt(IdentLoc, II, SubStmt); + + assert(LabelDecl->getID() == II && "Label mismatch!"); + + // Otherwise, this label was either forward reference or multiply defined. If + // multiply defined, reject it now. + if (LabelDecl->getSubStmt()) { + Diag(IdentLoc, diag::err_redefinition_of_label, LabelDecl->getName()); + Diag(LabelDecl->getIdentLoc(), diag::err_previous_definition); + return SubStmt; + } + + // Otherwise, this label was forward declared, and we just found its real + // definition. Fill in the forward definition and return it. + LabelDecl->setIdentLoc(IdentLoc); + LabelDecl->setSubStmt(SubStmt); + return LabelDecl; +} + +Action::StmtResult +Sema::ActOnIfStmt(SourceLocation IfLoc, ExprTy *CondVal, + StmtTy *ThenVal, SourceLocation ElseLoc, + StmtTy *ElseVal) { + Expr *condExpr = (Expr *)CondVal; + Stmt *thenStmt = (Stmt *)ThenVal; + + assert(condExpr && "ActOnIfStmt(): missing expression"); + + DefaultFunctionArrayConversion(condExpr); + QualType condType = condExpr->getType(); + + if (!condType->isScalarType()) // C99 6.8.4.1p1 + return Diag(IfLoc, diag::err_typecheck_statement_requires_scalar, + condType.getAsString(), condExpr->getSourceRange()); + + // Warn if the if block has a null body without an else value. + // this helps prevent bugs due to typos, such as + // if (condition); + // do_stuff(); + if (!ElseVal) { + if (NullStmt* stmt = dyn_cast<NullStmt>(thenStmt)) + Diag(stmt->getSemiLoc(), diag::warn_empty_if_body); + } + + return new IfStmt(IfLoc, condExpr, thenStmt, (Stmt*)ElseVal); +} + +Action::StmtResult +Sema::ActOnStartOfSwitchStmt(ExprTy *cond) { + Expr *Cond = static_cast<Expr*>(cond); + + // C99 6.8.4.2p5 - Integer promotions are performed on the controlling expr. + UsualUnaryConversions(Cond); + + SwitchStmt *SS = new SwitchStmt(Cond); + SwitchStack.push_back(SS); + return SS; +} + +/// ConvertIntegerToTypeWarnOnOverflow - Convert the specified APInt to have +/// the specified width and sign. If an overflow occurs, detect it and emit +/// the specified diagnostic. +void Sema::ConvertIntegerToTypeWarnOnOverflow(llvm::APSInt &Val, + unsigned NewWidth, bool NewSign, + SourceLocation Loc, + unsigned DiagID) { + // Perform a conversion to the promoted condition type if needed. + if (NewWidth > Val.getBitWidth()) { + // If this is an extension, just do it. + llvm::APSInt OldVal(Val); + Val.extend(NewWidth); + + // If the input was signed and negative and the output is unsigned, + // warn. + if (!NewSign && OldVal.isSigned() && OldVal.isNegative()) + Diag(Loc, DiagID, OldVal.toString(), Val.toString()); + + Val.setIsSigned(NewSign); + } else if (NewWidth < Val.getBitWidth()) { + // If this is a truncation, check for overflow. + llvm::APSInt ConvVal(Val); + ConvVal.trunc(NewWidth); + ConvVal.setIsSigned(NewSign); + ConvVal.extend(Val.getBitWidth()); + ConvVal.setIsSigned(Val.isSigned()); + if (ConvVal != Val) + Diag(Loc, DiagID, Val.toString(), ConvVal.toString()); + + // Regardless of whether a diagnostic was emitted, really do the + // truncation. + Val.trunc(NewWidth); + Val.setIsSigned(NewSign); + } else if (NewSign != Val.isSigned()) { + // Convert the sign to match the sign of the condition. This can cause + // overflow as well: unsigned(INTMIN) + llvm::APSInt OldVal(Val); + Val.setIsSigned(NewSign); + + if (Val.isNegative()) // Sign bit changes meaning. + Diag(Loc, DiagID, OldVal.toString(), Val.toString()); + } +} + +namespace { + struct CaseCompareFunctor { + bool operator()(const std::pair<llvm::APSInt, CaseStmt*> &LHS, + const llvm::APSInt &RHS) { + return LHS.first < RHS; + } + bool operator()(const std::pair<llvm::APSInt, CaseStmt*> &LHS, + const std::pair<llvm::APSInt, CaseStmt*> &RHS) { + return LHS.first < RHS.first; + } + bool operator()(const llvm::APSInt &LHS, + const std::pair<llvm::APSInt, CaseStmt*> &RHS) { + return LHS < RHS.first; + } + }; +} + +/// CmpCaseVals - Comparison predicate for sorting case values. +/// +static bool CmpCaseVals(const std::pair<llvm::APSInt, CaseStmt*>& lhs, + const std::pair<llvm::APSInt, CaseStmt*>& rhs) { + if (lhs.first < rhs.first) + return true; + + if (lhs.first == rhs.first && + lhs.second->getCaseLoc().getRawEncoding() + < rhs.second->getCaseLoc().getRawEncoding()) + return true; + return false; +} + +Action::StmtResult +Sema::ActOnFinishSwitchStmt(SourceLocation SwitchLoc, StmtTy *Switch, + ExprTy *Body) { + Stmt *BodyStmt = (Stmt*)Body; + + SwitchStmt *SS = SwitchStack.back(); + assert(SS == (SwitchStmt*)Switch && "switch stack missing push/pop!"); + + SS->setBody(BodyStmt, SwitchLoc); + SwitchStack.pop_back(); + + Expr *CondExpr = SS->getCond(); + QualType CondType = CondExpr->getType(); + + if (!CondType->isIntegerType()) { // C99 6.8.4.2p1 + Diag(SwitchLoc, diag::err_typecheck_statement_requires_integer, + CondType.getAsString(), CondExpr->getSourceRange()); + return true; + } + + // Get the bitwidth of the switched-on value before promotions. We must + // convert the integer case values to this width before comparison. + unsigned CondWidth = static_cast<unsigned>(Context.getTypeSize(CondType)); + bool CondIsSigned = CondType->isSignedIntegerType(); + + // Accumulate all of the case values in a vector so that we can sort them + // and detect duplicates. This vector contains the APInt for the case after + // it has been converted to the condition type. + typedef llvm::SmallVector<std::pair<llvm::APSInt, CaseStmt*>, 64> CaseValsTy; + CaseValsTy CaseVals; + + // Keep track of any GNU case ranges we see. The APSInt is the low value. + std::vector<std::pair<llvm::APSInt, CaseStmt*> > CaseRanges; + + DefaultStmt *TheDefaultStmt = 0; + + bool CaseListIsErroneous = false; + + for (SwitchCase *SC = SS->getSwitchCaseList(); SC; + SC = SC->getNextSwitchCase()) { + + if (DefaultStmt *DS = dyn_cast<DefaultStmt>(SC)) { + if (TheDefaultStmt) { + Diag(DS->getDefaultLoc(), diag::err_multiple_default_labels_defined); + Diag(TheDefaultStmt->getDefaultLoc(), diag::err_first_label); + + // FIXME: Remove the default statement from the switch block so that + // we'll return a valid AST. This requires recursing down the + // AST and finding it, not something we are set up to do right now. For + // now, just lop the entire switch stmt out of the AST. + CaseListIsErroneous = true; + } + TheDefaultStmt = DS; + + } else { + CaseStmt *CS = cast<CaseStmt>(SC); + + // We already verified that the expression has a i-c-e value (C99 + // 6.8.4.2p3) - get that value now. + llvm::APSInt LoVal(32); + Expr *Lo = CS->getLHS(); + Lo->isIntegerConstantExpr(LoVal, Context); + + // Convert the value to the same width/sign as the condition. + ConvertIntegerToTypeWarnOnOverflow(LoVal, CondWidth, CondIsSigned, + CS->getLHS()->getLocStart(), + diag::warn_case_value_overflow); + + // If the LHS is not the same type as the condition, insert an implicit + // cast. + ImpCastExprToType(Lo, CondType); + CS->setLHS(Lo); + + // If this is a case range, remember it in CaseRanges, otherwise CaseVals. + if (CS->getRHS()) + CaseRanges.push_back(std::make_pair(LoVal, CS)); + else + CaseVals.push_back(std::make_pair(LoVal, CS)); + } + } + + // Sort all the scalar case values so we can easily detect duplicates. + std::stable_sort(CaseVals.begin(), CaseVals.end(), CmpCaseVals); + + if (!CaseVals.empty()) { + for (unsigned i = 0, e = CaseVals.size()-1; i != e; ++i) { + if (CaseVals[i].first == CaseVals[i+1].first) { + // If we have a duplicate, report it. + Diag(CaseVals[i+1].second->getLHS()->getLocStart(), + diag::err_duplicate_case, CaseVals[i].first.toString()); + Diag(CaseVals[i].second->getLHS()->getLocStart(), + diag::err_duplicate_case_prev); + // FIXME: We really want to remove the bogus case stmt from the substmt, + // but we have no way to do this right now. + CaseListIsErroneous = true; + } + } + } + + // Detect duplicate case ranges, which usually don't exist at all in the first + // place. + if (!CaseRanges.empty()) { + // Sort all the case ranges by their low value so we can easily detect + // overlaps between ranges. + std::stable_sort(CaseRanges.begin(), CaseRanges.end()); + + // Scan the ranges, computing the high values and removing empty ranges. + std::vector<llvm::APSInt> HiVals; + for (unsigned i = 0, e = CaseRanges.size(); i != e; ++i) { + CaseStmt *CR = CaseRanges[i].second; + llvm::APSInt HiVal(32); + Expr *Hi = CR->getRHS(); + Hi->isIntegerConstantExpr(HiVal, Context); + + // Convert the value to the same width/sign as the condition. + ConvertIntegerToTypeWarnOnOverflow(HiVal, CondWidth, CondIsSigned, + CR->getRHS()->getLocStart(), + diag::warn_case_value_overflow); + + // If the LHS is not the same type as the condition, insert an implicit + // cast. + ImpCastExprToType(Hi, CondType); + CR->setRHS(Hi); + + // If the low value is bigger than the high value, the case is empty. + if (CaseRanges[i].first > HiVal) { + Diag(CR->getLHS()->getLocStart(), diag::warn_case_empty_range, + SourceRange(CR->getLHS()->getLocStart(), + CR->getRHS()->getLocEnd())); + CaseRanges.erase(CaseRanges.begin()+i); + --i, --e; + continue; + } + HiVals.push_back(HiVal); + } + + // Rescan the ranges, looking for overlap with singleton values and other + // ranges. Since the range list is sorted, we only need to compare case + // ranges with their neighbors. + for (unsigned i = 0, e = CaseRanges.size(); i != e; ++i) { + llvm::APSInt &CRLo = CaseRanges[i].first; + llvm::APSInt &CRHi = HiVals[i]; + CaseStmt *CR = CaseRanges[i].second; + + // Check to see whether the case range overlaps with any singleton cases. + CaseStmt *OverlapStmt = 0; + llvm::APSInt OverlapVal(32); + + // Find the smallest value >= the lower bound. If I is in the case range, + // then we have overlap. + CaseValsTy::iterator I = std::lower_bound(CaseVals.begin(), + CaseVals.end(), CRLo, + CaseCompareFunctor()); + if (I != CaseVals.end() && I->first < CRHi) { + OverlapVal = I->first; // Found overlap with scalar. + OverlapStmt = I->second; + } + + // Find the smallest value bigger than the upper bound. + I = std::upper_bound(I, CaseVals.end(), CRHi, CaseCompareFunctor()); + if (I != CaseVals.begin() && (I-1)->first >= CRLo) { + OverlapVal = (I-1)->first; // Found overlap with scalar. + OverlapStmt = (I-1)->second; + } + + // Check to see if this case stmt overlaps with the subsequent case range. + if (i && CRLo <= HiVals[i-1]) { + OverlapVal = HiVals[i-1]; // Found overlap with range. + OverlapStmt = CaseRanges[i-1].second; + } + + if (OverlapStmt) { + // If we have a duplicate, report it. + Diag(CR->getLHS()->getLocStart(), + diag::err_duplicate_case, OverlapVal.toString()); + Diag(OverlapStmt->getLHS()->getLocStart(), + diag::err_duplicate_case_prev); + // FIXME: We really want to remove the bogus case stmt from the substmt, + // but we have no way to do this right now. + CaseListIsErroneous = true; + } + } + } + + // FIXME: If the case list was broken is some way, we don't have a good system + // to patch it up. Instead, just return the whole substmt as broken. + if (CaseListIsErroneous) + return true; + + return SS; +} + +Action::StmtResult +Sema::ActOnWhileStmt(SourceLocation WhileLoc, ExprTy *Cond, StmtTy *Body) { + Expr *condExpr = (Expr *)Cond; + assert(condExpr && "ActOnWhileStmt(): missing expression"); + + DefaultFunctionArrayConversion(condExpr); + QualType condType = condExpr->getType(); + + if (!condType->isScalarType()) // C99 6.8.5p2 + return Diag(WhileLoc, diag::err_typecheck_statement_requires_scalar, + condType.getAsString(), condExpr->getSourceRange()); + + return new WhileStmt(condExpr, (Stmt*)Body, WhileLoc); +} + +Action::StmtResult +Sema::ActOnDoStmt(SourceLocation DoLoc, StmtTy *Body, + SourceLocation WhileLoc, ExprTy *Cond) { + Expr *condExpr = (Expr *)Cond; + assert(condExpr && "ActOnDoStmt(): missing expression"); + + DefaultFunctionArrayConversion(condExpr); + QualType condType = condExpr->getType(); + + if (!condType->isScalarType()) // C99 6.8.5p2 + return Diag(DoLoc, diag::err_typecheck_statement_requires_scalar, + condType.getAsString(), condExpr->getSourceRange()); + + return new DoStmt((Stmt*)Body, condExpr, DoLoc); +} + +Action::StmtResult +Sema::ActOnForStmt(SourceLocation ForLoc, SourceLocation LParenLoc, + StmtTy *first, ExprTy *second, ExprTy *third, + SourceLocation RParenLoc, StmtTy *body) { + Stmt *First = static_cast<Stmt*>(first); + Expr *Second = static_cast<Expr*>(second); + Expr *Third = static_cast<Expr*>(third); + Stmt *Body = static_cast<Stmt*>(body); + + if (DeclStmt *DS = dyn_cast_or_null<DeclStmt>(First)) { + // C99 6.8.5p3: The declaration part of a 'for' statement shall only declare + // identifiers for objects having storage class 'auto' or 'register'. + for (ScopedDecl *D = DS->getDecl(); D; D = D->getNextDeclarator()) { + BlockVarDecl *BVD = dyn_cast<BlockVarDecl>(D); + if (BVD && !BVD->hasLocalStorage()) + BVD = 0; + if (BVD == 0) + Diag(dyn_cast<ScopedDecl>(D)->getLocation(), + diag::err_non_variable_decl_in_for); + // FIXME: mark decl erroneous! + } + } + if (Second) { + DefaultFunctionArrayConversion(Second); + QualType SecondType = Second->getType(); + + if (!SecondType->isScalarType()) // C99 6.8.5p2 + return Diag(ForLoc, diag::err_typecheck_statement_requires_scalar, + SecondType.getAsString(), Second->getSourceRange()); + } + return new ForStmt(First, Second, Third, Body, ForLoc); +} + +Action::StmtResult +Sema::ActOnObjCForCollectionStmt(SourceLocation ForLoc, + SourceLocation LParenLoc, + StmtTy *first, ExprTy *second, + SourceLocation RParenLoc, StmtTy *body) { + Stmt *First = static_cast<Stmt*>(first); + Expr *Second = static_cast<Expr*>(second); + Stmt *Body = static_cast<Stmt*>(body); + if (First) { + QualType FirstType; + if (DeclStmt *DS = dyn_cast<DeclStmt>(First)) { + FirstType = cast<ValueDecl>(DS->getDecl())->getType(); + // C99 6.8.5p3: The declaration part of a 'for' statement shall only declare + // identifiers for objects having storage class 'auto' or 'register'. + ScopedDecl *D = DS->getDecl(); + BlockVarDecl *BVD = cast<BlockVarDecl>(D); + if (!BVD->hasLocalStorage()) + return Diag(BVD->getLocation(), diag::err_non_variable_decl_in_for); + if (D->getNextDeclarator()) + return Diag(D->getLocation(), diag::err_toomany_element_decls); + } + else + FirstType = static_cast<Expr*>(first)->getType(); + if (!isObjCObjectPointerType(FirstType)) + Diag(ForLoc, diag::err_selector_element_type, + FirstType.getAsString(), First->getSourceRange()); + } + if (Second) { + DefaultFunctionArrayConversion(Second); + QualType SecondType = Second->getType(); + if (!isObjCObjectPointerType(SecondType)) + Diag(ForLoc, diag::err_collection_expr_type, + SecondType.getAsString(), Second->getSourceRange()); + } + return new ObjCForCollectionStmt(First, Second, Body, ForLoc, RParenLoc); +} + +Action::StmtResult +Sema::ActOnGotoStmt(SourceLocation GotoLoc, SourceLocation LabelLoc, + IdentifierInfo *LabelII) { + // Look up the record for this label identifier. + LabelStmt *&LabelDecl = LabelMap[LabelII]; + + // If we haven't seen this label yet, create a forward reference. + if (LabelDecl == 0) + LabelDecl = new LabelStmt(LabelLoc, LabelII, 0); + + return new GotoStmt(LabelDecl, GotoLoc, LabelLoc); +} + +Action::StmtResult +Sema::ActOnIndirectGotoStmt(SourceLocation GotoLoc,SourceLocation StarLoc, + ExprTy *DestExp) { + // FIXME: Verify that the operand is convertible to void*. + + return new IndirectGotoStmt((Expr*)DestExp); +} + +Action::StmtResult +Sema::ActOnContinueStmt(SourceLocation ContinueLoc, Scope *CurScope) { + Scope *S = CurScope->getContinueParent(); + if (!S) { + // C99 6.8.6.2p1: A break shall appear only in or as a loop body. + Diag(ContinueLoc, diag::err_continue_not_in_loop); + return true; + } + + return new ContinueStmt(ContinueLoc); +} + +Action::StmtResult +Sema::ActOnBreakStmt(SourceLocation BreakLoc, Scope *CurScope) { + Scope *S = CurScope->getBreakParent(); + if (!S) { + // C99 6.8.6.3p1: A break shall appear only in or as a switch/loop body. + Diag(BreakLoc, diag::err_break_not_in_loop_or_switch); + return true; + } + + return new BreakStmt(BreakLoc); +} + + +Action::StmtResult +Sema::ActOnReturnStmt(SourceLocation ReturnLoc, ExprTy *rex) { + Expr *RetValExp = static_cast<Expr *>(rex); + QualType FnRetType = CurFunctionDecl ? CurFunctionDecl->getResultType() : + CurMethodDecl->getResultType(); + + if (FnRetType->isVoidType()) { + if (RetValExp) // C99 6.8.6.4p1 (ext_ since GCC warns) + Diag(ReturnLoc, diag::ext_return_has_expr, + (CurFunctionDecl ? CurFunctionDecl->getIdentifier()->getName() : + CurMethodDecl->getSelector().getName()), + RetValExp->getSourceRange()); + return new ReturnStmt(ReturnLoc, RetValExp); + } else { + if (!RetValExp) { + const char *funcName = CurFunctionDecl ? + CurFunctionDecl->getIdentifier()->getName() : + CurMethodDecl->getSelector().getName().c_str(); + if (getLangOptions().C99) // C99 6.8.6.4p1 (ext_ since GCC warns) + Diag(ReturnLoc, diag::ext_return_missing_expr, funcName); + else // C90 6.6.6.4p4 + Diag(ReturnLoc, diag::warn_return_missing_expr, funcName); + return new ReturnStmt(ReturnLoc, (Expr*)0); + } + } + // we have a non-void function with an expression, continue checking + QualType RetValType = RetValExp->getType(); + + // C99 6.8.6.4p3(136): The return statement is not an assignment. The + // overlap restriction of subclause 6.5.16.1 does not apply to the case of + // function return. + AssignConvertType ConvTy = CheckSingleAssignmentConstraints(FnRetType, + RetValExp); + if (DiagnoseAssignmentResult(ConvTy, ReturnLoc, FnRetType, + RetValType, RetValExp, "returning")) + return true; + + if (RetValExp) CheckReturnStackAddr(RetValExp, FnRetType, ReturnLoc); + + return new ReturnStmt(ReturnLoc, (Expr*)RetValExp); +} + +Sema::StmtResult Sema::ActOnAsmStmt(SourceLocation AsmLoc, + bool IsSimple, + bool IsVolatile, + unsigned NumOutputs, + unsigned NumInputs, + std::string *Names, + ExprTy **Constraints, + ExprTy **Exprs, + ExprTy *AsmString, + unsigned NumClobbers, + ExprTy **Clobbers, + SourceLocation RParenLoc) { + Expr *E = (Expr *)AsmString; + + for (unsigned i = 0; i < NumOutputs; i++) { + StringLiteral *Literal = cast<StringLiteral>((Expr *)Constraints[i]); + assert(!Literal->isWide() && + "Output constraint strings should not be wide!"); + + std::string OutputConstraint(Literal->getStrData(), + Literal->getByteLength()); + + TargetInfo::ConstraintInfo info; + if (!Context.Target.validateOutputConstraint(OutputConstraint.c_str(), + info)) { + // FIXME: We currently leak memory here. + Diag(Literal->getLocStart(), + diag::err_invalid_output_constraint_in_asm); + return true; + } + + // Check that the output exprs are valid lvalues. + Expr *OutputExpr = (Expr *)Exprs[i]; + Expr::isLvalueResult Result = OutputExpr->isLvalue(); + if (Result != Expr::LV_Valid) { + ParenExpr *PE = cast<ParenExpr>(OutputExpr); + + Diag(PE->getSubExpr()->getLocStart(), + diag::err_invalid_lvalue_in_asm_output, + PE->getSubExpr()->getSourceRange()); + + // FIXME: We currently leak memory here. + return true; + } + } + + for (unsigned i = NumOutputs, e = NumOutputs + NumInputs; i != e; i++) { + StringLiteral *Literal = cast<StringLiteral>((Expr *)Constraints[i]); + assert(!Literal->isWide() && + "Output constraint strings should not be wide!"); + + std::string InputConstraint(Literal->getStrData(), + Literal->getByteLength()); + + TargetInfo::ConstraintInfo info; + if (!Context.Target.validateInputConstraint(InputConstraint.c_str(), + NumOutputs, + info)) { + // FIXME: We currently leak memory here. + Diag(Literal->getLocStart(), + diag::err_invalid_input_constraint_in_asm); + return true; + } + + // Check that the input exprs aren't of type void. + Expr *InputExpr = (Expr *)Exprs[i]; + if (InputExpr->getType()->isVoidType()) { + ParenExpr *PE = cast<ParenExpr>(InputExpr); + + Diag(PE->getSubExpr()->getLocStart(), + diag::err_invalid_type_in_asm_input, + PE->getType().getAsString(), + PE->getSubExpr()->getSourceRange()); + + // FIXME: We currently leak memory here. + return true; + } + } + + // Check that the clobbers are valid. + for (unsigned i = 0; i < NumClobbers; i++) { + StringLiteral *Literal = cast<StringLiteral>((Expr *)Clobbers[i]); + assert(!Literal->isWide() && "Clobber strings should not be wide!"); + + llvm::SmallString<16> Clobber(Literal->getStrData(), + Literal->getStrData() + + Literal->getByteLength()); + + if (!Context.Target.isValidGCCRegisterName(Clobber.c_str())) { + Diag(Literal->getLocStart(), + diag::err_unknown_register_name_in_asm, + Clobber.c_str()); + + // FIXME: We currently leak memory here. + return true; + } + } + + return new AsmStmt(AsmLoc, + IsSimple, + IsVolatile, + NumOutputs, + NumInputs, + Names, + reinterpret_cast<StringLiteral**>(Constraints), + reinterpret_cast<Expr**>(Exprs), + cast<StringLiteral>(E), + NumClobbers, + reinterpret_cast<StringLiteral**>(Clobbers), + RParenLoc); +} + +Action::StmtResult +Sema::ActOnObjCAtCatchStmt(SourceLocation AtLoc, + SourceLocation RParen, StmtTy *Parm, + StmtTy *Body, StmtTy *CatchList) { + ObjCAtCatchStmt *CS = new ObjCAtCatchStmt(AtLoc, RParen, + static_cast<Stmt*>(Parm), static_cast<Stmt*>(Body), + static_cast<Stmt*>(CatchList)); + return CatchList ? CatchList : CS; +} + +Action::StmtResult +Sema::ActOnObjCAtFinallyStmt(SourceLocation AtLoc, StmtTy *Body) { + ObjCAtFinallyStmt *FS = new ObjCAtFinallyStmt(AtLoc, + static_cast<Stmt*>(Body)); + return FS; +} + +Action::StmtResult +Sema::ActOnObjCAtTryStmt(SourceLocation AtLoc, + StmtTy *Try, StmtTy *Catch, StmtTy *Finally) { + ObjCAtTryStmt *TS = new ObjCAtTryStmt(AtLoc, static_cast<Stmt*>(Try), + static_cast<Stmt*>(Catch), + static_cast<Stmt*>(Finally)); + return TS; +} + +Action::StmtResult +Sema::ActOnObjCAtThrowStmt(SourceLocation AtLoc, StmtTy *Throw) { + ObjCAtThrowStmt *TS = new ObjCAtThrowStmt(AtLoc, static_cast<Stmt*>(Throw)); + return TS; +} + +Action::StmtResult +Sema::ActOnObjCAtSynchronizedStmt(SourceLocation AtLoc, ExprTy *SynchExpr, + StmtTy *SynchBody) { + ObjCAtSynchronizedStmt *SS = new ObjCAtSynchronizedStmt(AtLoc, + static_cast<Stmt*>(SynchExpr), static_cast<Stmt*>(SynchBody)); + return SS; +} + + diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp new file mode 100644 index 00000000000..f717b4139a5 --- /dev/null +++ b/clang/lib/Sema/SemaType.cpp @@ -0,0 +1,498 @@ +//===--- SemaType.cpp - Semantic Analysis for Types -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements type-related semantic analysis. +// +//===----------------------------------------------------------------------===// + +#include "Sema.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclObjC.h" +#include "clang/Parse/DeclSpec.h" +#include "clang/Basic/LangOptions.h" +using namespace clang; + +/// ConvertDeclSpecToType - Convert the specified declspec to the appropriate +/// type object. This returns null on error. +QualType Sema::ConvertDeclSpecToType(DeclSpec &DS) { + // FIXME: Should move the logic from DeclSpec::Finish to here for validity + // checking. + QualType Result; + + switch (DS.getTypeSpecType()) { + default: return QualType(); // FIXME: Handle unimp cases! + case DeclSpec::TST_void: return Context.VoidTy; + case DeclSpec::TST_char: + if (DS.getTypeSpecSign() == DeclSpec::TSS_unspecified) + Result = Context.CharTy; + else if (DS.getTypeSpecSign() == DeclSpec::TSS_signed) + Result = Context.SignedCharTy; + else { + assert(DS.getTypeSpecSign() == DeclSpec::TSS_unsigned && + "Unknown TSS value"); + Result = Context.UnsignedCharTy; + } + break; + case DeclSpec::TST_unspecified: // Unspecific typespec defaults to int. + case DeclSpec::TST_int: { + if (DS.getTypeSpecSign() != DeclSpec::TSS_unsigned) { + switch (DS.getTypeSpecWidth()) { + case DeclSpec::TSW_unspecified: Result = Context.IntTy; break; + case DeclSpec::TSW_short: Result = Context.ShortTy; break; + case DeclSpec::TSW_long: Result = Context.LongTy; break; + case DeclSpec::TSW_longlong: Result = Context.LongLongTy; break; + } + } else { + switch (DS.getTypeSpecWidth()) { + case DeclSpec::TSW_unspecified: Result = Context.UnsignedIntTy; break; + case DeclSpec::TSW_short: Result = Context.UnsignedShortTy; break; + case DeclSpec::TSW_long: Result = Context.UnsignedLongTy; break; + case DeclSpec::TSW_longlong: Result =Context.UnsignedLongLongTy; break; + } + } + break; + } + case DeclSpec::TST_float: Result = Context.FloatTy; break; + case DeclSpec::TST_double: + if (DS.getTypeSpecWidth() == DeclSpec::TSW_long) + Result = Context.LongDoubleTy; + else + Result = Context.DoubleTy; + break; + case DeclSpec::TST_bool: Result = Context.BoolTy; break; // _Bool or bool + case DeclSpec::TST_decimal32: // _Decimal32 + case DeclSpec::TST_decimal64: // _Decimal64 + case DeclSpec::TST_decimal128: // _Decimal128 + assert(0 && "FIXME: GNU decimal extensions not supported yet!"); + case DeclSpec::TST_enum: + case DeclSpec::TST_union: + case DeclSpec::TST_struct: { + Decl *D = static_cast<Decl *>(DS.getTypeRep()); + assert(D && "Didn't get a decl for a enum/union/struct?"); + assert(DS.getTypeSpecWidth() == 0 && DS.getTypeSpecComplex() == 0 && + DS.getTypeSpecSign() == 0 && + "Can't handle qualifiers on typedef names yet!"); + // TypeQuals handled by caller. + Result = Context.getTagDeclType(cast<TagDecl>(D)); + break; + } + case DeclSpec::TST_typedef: { + Decl *D = static_cast<Decl *>(DS.getTypeRep()); + assert(D && "Didn't get a decl for a typedef?"); + assert(DS.getTypeSpecWidth() == 0 && DS.getTypeSpecComplex() == 0 && + DS.getTypeSpecSign() == 0 && + "Can't handle qualifiers on typedef names yet!"); + // FIXME: Adding a TST_objcInterface clause doesn't seem ideal, so + // we have this "hack" for now... + if (ObjCInterfaceDecl *ObjCIntDecl = dyn_cast<ObjCInterfaceDecl>(D)) { + if (DS.getProtocolQualifiers() == 0) { + Result = Context.getObjCInterfaceType(ObjCIntDecl); + break; + } + + Action::DeclTy **PPDecl = &(*DS.getProtocolQualifiers())[0]; + Result = Context.getObjCQualifiedInterfaceType(ObjCIntDecl, + reinterpret_cast<ObjCProtocolDecl**>(PPDecl), + DS.getNumProtocolQualifiers()); + break; + } + else if (TypedefDecl *typeDecl = dyn_cast<TypedefDecl>(D)) { + if (Context.getObjCIdType() == Context.getTypedefType(typeDecl) + && DS.getProtocolQualifiers()) { + // id<protocol-list> + Action::DeclTy **PPDecl = &(*DS.getProtocolQualifiers())[0]; + Result = Context.getObjCQualifiedIdType(typeDecl->getUnderlyingType(), + reinterpret_cast<ObjCProtocolDecl**>(PPDecl), + DS.getNumProtocolQualifiers()); + break; + } + } + // TypeQuals handled by caller. + Result = Context.getTypedefType(cast<TypedefDecl>(D)); + break; + } + case DeclSpec::TST_typeofType: + Result = QualType::getFromOpaquePtr(DS.getTypeRep()); + assert(!Result.isNull() && "Didn't get a type for typeof?"); + // TypeQuals handled by caller. + Result = Context.getTypeOfType(Result); + break; + case DeclSpec::TST_typeofExpr: { + Expr *E = static_cast<Expr *>(DS.getTypeRep()); + assert(E && "Didn't get an expression for typeof?"); + // TypeQuals handled by caller. + Result = Context.getTypeOfExpr(E); + break; + } + } + + // Handle complex types. + if (DS.getTypeSpecComplex() == DeclSpec::TSC_complex) + Result = Context.getComplexType(Result); + + assert(DS.getTypeSpecComplex() != DeclSpec::TSC_imaginary && + "FIXME: imaginary types not supported yet!"); + + // See if there are any attributes on the declspec that apply to the type (as + // opposed to the decl). + if (AttributeList *AL = DS.getAttributes()) + DS.SetAttributes(ProcessTypeAttributes(Result, AL)); + + return Result; +} + +/// GetTypeForDeclarator - Convert the type for the specified declarator to Type +/// instances. +QualType Sema::GetTypeForDeclarator(Declarator &D, Scope *S) { + // long long is a C99 feature. + if (!getLangOptions().C99 && !getLangOptions().CPlusPlus0x && + D.getDeclSpec().getTypeSpecWidth() == DeclSpec::TSW_longlong) + Diag(D.getDeclSpec().getTypeSpecWidthLoc(), diag::ext_longlong); + + QualType T = ConvertDeclSpecToType(D.getDeclSpec()); + + // Apply const/volatile/restrict qualifiers to T. + T = T.getQualifiedType(D.getDeclSpec().getTypeQualifiers()); + + // Walk the DeclTypeInfo, building the recursive type as we go. DeclTypeInfos + // are ordered from the identifier out, which is opposite of what we want :). + for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) { + DeclaratorChunk &DeclType = D.getTypeObject(e-i-1); + switch (DeclType.Kind) { + default: assert(0 && "Unknown decltype!"); + case DeclaratorChunk::Pointer: + if (T->isReferenceType()) { + // C++ 8.3.2p4: There shall be no ... pointers to references ... + Diag(D.getIdentifierLoc(), diag::err_illegal_decl_pointer_to_reference, + D.getIdentifier() ? D.getIdentifier()->getName() : "type name"); + D.setInvalidType(true); + T = Context.IntTy; + } + + // Apply the pointer typequals to the pointer object. + T = Context.getPointerType(T).getQualifiedType(DeclType.Ptr.TypeQuals); + + // See if there are any attributes on the pointer that apply to it. + if (AttributeList *AL = DeclType.Ptr.AttrList) + DeclType.Ptr.AttrList = ProcessTypeAttributes(T, AL); + + break; + case DeclaratorChunk::Reference: + if (const ReferenceType *RT = T->getAsReferenceType()) { + // C++ 8.3.2p4: There shall be no references to references. + Diag(D.getIdentifierLoc(), + diag::err_illegal_decl_reference_to_reference, + D.getIdentifier() ? D.getIdentifier()->getName() : "type name"); + D.setInvalidType(true); + T = RT->getReferenceeType(); + } + + T = Context.getReferenceType(T); + + // FIXME: Handle Ref.Restrict! + + // See if there are any attributes on the pointer that apply to it. + if (AttributeList *AL = DeclType.Ref.AttrList) + DeclType.Ref.AttrList = ProcessTypeAttributes(T, AL); + break; + case DeclaratorChunk::Array: { + const DeclaratorChunk::ArrayTypeInfo &ATI = DeclType.Arr; + Expr *ArraySize = static_cast<Expr*>(ATI.NumElts); + ArrayType::ArraySizeModifier ASM; + if (ATI.isStar) + ASM = ArrayType::Star; + else if (ATI.hasStatic) + ASM = ArrayType::Static; + else + ASM = ArrayType::Normal; + + // C99 6.7.5.2p1: If the element type is an incomplete or function type, + // reject it (e.g. void ary[7], struct foo ary[7], void ary[7]()) + if (T->isIncompleteType()) { + Diag(D.getIdentifierLoc(), diag::err_illegal_decl_array_incomplete_type, + T.getAsString()); + T = Context.IntTy; + D.setInvalidType(true); + } else if (T->isFunctionType()) { + Diag(D.getIdentifierLoc(), diag::err_illegal_decl_array_of_functions, + D.getIdentifier() ? D.getIdentifier()->getName() : "type name"); + T = Context.getPointerType(T); + D.setInvalidType(true); + } else if (const ReferenceType *RT = T->getAsReferenceType()) { + // C++ 8.3.2p4: There shall be no ... arrays of references ... + Diag(D.getIdentifierLoc(), diag::err_illegal_decl_array_of_references, + D.getIdentifier() ? D.getIdentifier()->getName() : "type name"); + T = RT->getReferenceeType(); + D.setInvalidType(true); + } else if (const RecordType *EltTy = T->getAsRecordType()) { + // If the element type is a struct or union that contains a variadic + // array, reject it: C99 6.7.2.1p2. + if (EltTy->getDecl()->hasFlexibleArrayMember()) { + Diag(DeclType.Loc, diag::err_flexible_array_in_array, + T.getAsString()); + T = Context.IntTy; + D.setInvalidType(true); + } + } + // C99 6.7.5.2p1: The size expression shall have integer type. + if (ArraySize && !ArraySize->getType()->isIntegerType()) { + Diag(ArraySize->getLocStart(), diag::err_array_size_non_int, + ArraySize->getType().getAsString(), ArraySize->getSourceRange()); + D.setInvalidType(true); + } + llvm::APSInt ConstVal(32); + // If no expression was provided, we consider it a VLA. + if (!ArraySize) { + T = Context.getIncompleteArrayType(T, ASM, ATI.TypeQuals); + } else if (!ArraySize->isIntegerConstantExpr(ConstVal, Context)) { + T = Context.getVariableArrayType(T, ArraySize, ASM, ATI.TypeQuals); + } else { + // C99 6.7.5.2p1: If the expression is a constant expression, it shall + // have a value greater than zero. + if (ConstVal.isSigned()) { + if (ConstVal.isNegative()) { + Diag(ArraySize->getLocStart(), + diag::err_typecheck_negative_array_size, + ArraySize->getSourceRange()); + D.setInvalidType(true); + } else if (ConstVal == 0) { + // GCC accepts zero sized static arrays. + Diag(ArraySize->getLocStart(), diag::ext_typecheck_zero_array_size, + ArraySize->getSourceRange()); + } + } + T = Context.getConstantArrayType(T, ConstVal, ASM, ATI.TypeQuals); + } + // If this is not C99, extwarn about VLA's and C99 array size modifiers. + if (!getLangOptions().C99 && + (ASM != ArrayType::Normal || + (ArraySize && !ArraySize->isIntegerConstantExpr(Context)))) + Diag(D.getIdentifierLoc(), diag::ext_vla); + break; + } + case DeclaratorChunk::Function: + // If the function declarator has a prototype (i.e. it is not () and + // does not have a K&R-style identifier list), then the arguments are part + // of the type, otherwise the argument list is (). + const DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun; + + // C99 6.7.5.3p1: The return type may not be a function or array type. + if (T->isArrayType() || T->isFunctionType()) { + Diag(DeclType.Loc, diag::err_func_returning_array_function, + T.getAsString()); + T = Context.IntTy; + D.setInvalidType(true); + } + + if (!FTI.hasPrototype) { + // Simple void foo(), where the incoming T is the result type. + T = Context.getFunctionTypeNoProto(T); + + // C99 6.7.5.3p3: Reject int(x,y,z) when it's not a function definition. + if (FTI.NumArgs != 0) + Diag(FTI.ArgInfo[0].IdentLoc, diag::err_ident_list_in_fn_declaration); + + } else { + // Otherwise, we have a function with an argument list that is + // potentially variadic. + llvm::SmallVector<QualType, 16> ArgTys; + + for (unsigned i = 0, e = FTI.NumArgs; i != e; ++i) { + QualType ArgTy = QualType::getFromOpaquePtr(FTI.ArgInfo[i].TypeInfo); + assert(!ArgTy.isNull() && "Couldn't parse type?"); + // + // Perform the default function/array conversion (C99 6.7.5.3p[7,8]). + // This matches the conversion that is done in + // Sema::ActOnParamDeclarator(). Without this conversion, the + // argument type in the function prototype *will not* match the + // type in ParmVarDecl (which makes the code generator unhappy). + // + // FIXME: We still apparently need the conversion in + // Sema::ParseParamDeclarator(). This doesn't make any sense, since + // it should be driving off the type being created here. + // + // FIXME: If a source translation tool needs to see the original type, + // then we need to consider storing both types somewhere... + // + if (const ArrayType *AT = ArgTy->getAsArrayType()) { + // int x[restrict 4] -> int *restrict + ArgTy = Context.getPointerType(AT->getElementType()); + ArgTy = ArgTy.getQualifiedType(AT->getIndexTypeQualifier()); + } else if (ArgTy->isFunctionType()) + ArgTy = Context.getPointerType(ArgTy); + // Look for 'void'. void is allowed only as a single argument to a + // function with no other parameters (C99 6.7.5.3p10). We record + // int(void) as a FunctionTypeProto with an empty argument list. + else if (ArgTy->isVoidType()) { + // If this is something like 'float(int, void)', reject it. 'void' + // is an incomplete type (C99 6.2.5p19) and function decls cannot + // have arguments of incomplete type. + if (FTI.NumArgs != 1 || FTI.isVariadic) { + Diag(DeclType.Loc, diag::err_void_only_param); + ArgTy = Context.IntTy; + FTI.ArgInfo[i].TypeInfo = ArgTy.getAsOpaquePtr(); + } else if (FTI.ArgInfo[i].Ident) { + // Reject, but continue to parse 'int(void abc)'. + Diag(FTI.ArgInfo[i].IdentLoc, + diag::err_param_with_void_type); + ArgTy = Context.IntTy; + FTI.ArgInfo[i].TypeInfo = ArgTy.getAsOpaquePtr(); + } else { + // Reject, but continue to parse 'float(const void)'. + if (ArgTy.getCVRQualifiers()) + Diag(DeclType.Loc, diag::err_void_param_qualified); + + // Do not add 'void' to the ArgTys list. + break; + } + } + + ArgTys.push_back(ArgTy); + } + T = Context.getFunctionType(T, &ArgTys[0], ArgTys.size(), + FTI.isVariadic); + } + break; + } + } + + return T; +} + +/// ObjCGetTypeForMethodDefinition - Builds the type for a method definition +/// declarator +QualType Sema::ObjCGetTypeForMethodDefinition(DeclTy *D) { + ObjCMethodDecl *MDecl = dyn_cast<ObjCMethodDecl>(static_cast<Decl *>(D)); + QualType T = MDecl->getResultType(); + llvm::SmallVector<QualType, 16> ArgTys; + + // Add the first two invisible argument types for self and _cmd. + if (MDecl->isInstance()) { + QualType selfTy = Context.getObjCInterfaceType(MDecl->getClassInterface()); + selfTy = Context.getPointerType(selfTy); + ArgTys.push_back(selfTy); + } + else + ArgTys.push_back(Context.getObjCIdType()); + ArgTys.push_back(Context.getObjCSelType()); + + for (int i = 0; i < MDecl->getNumParams(); i++) { + ParmVarDecl *PDecl = MDecl->getParamDecl(i); + QualType ArgTy = PDecl->getType(); + assert(!ArgTy.isNull() && "Couldn't parse type?"); + // Perform the default function/array conversion (C99 6.7.5.3p[7,8]). + // This matches the conversion that is done in + // Sema::ParseParamDeclarator(). + if (const ArrayType *AT = ArgTy->getAsArrayType()) + ArgTy = Context.getPointerType(AT->getElementType()); + else if (ArgTy->isFunctionType()) + ArgTy = Context.getPointerType(ArgTy); + ArgTys.push_back(ArgTy); + } + T = Context.getFunctionType(T, &ArgTys[0], ArgTys.size(), + MDecl->isVariadic()); + return T; +} + +Sema::TypeResult Sema::ActOnTypeName(Scope *S, Declarator &D) { + // C99 6.7.6: Type names have no identifier. This is already validated by + // the parser. + assert(D.getIdentifier() == 0 && "Type name should have no identifier!"); + + QualType T = GetTypeForDeclarator(D, S); + + assert(!T.isNull() && "GetTypeForDeclarator() returned null type"); + + // In this context, we *do not* check D.getInvalidType(). If the declarator + // type was invalid, GetTypeForDeclarator() still returns a "valid" type, + // though it will not reflect the user specified type. + return T.getAsOpaquePtr(); +} + +// Called from Parser::ParseParenDeclarator(). +Sema::TypeResult Sema::ActOnParamDeclaratorType(Scope *S, Declarator &D) { + // Note: parameters have identifiers, but we don't care about them here, we + // just want the type converted. + QualType T = GetTypeForDeclarator(D, S); + + assert(!T.isNull() && "GetTypeForDeclarator() returned null type"); + + // In this context, we *do not* check D.getInvalidType(). If the declarator + // type was invalid, GetTypeForDeclarator() still returns a "valid" type, + // though it will not reflect the user specified type. + return T.getAsOpaquePtr(); +} + +AttributeList *Sema::ProcessTypeAttributes(QualType &Result, AttributeList *AL){ + // Scan through and apply attributes to this type where it makes sense. Some + // attributes (such as __address_space__, __vector_size__, etc) apply to the + // type, but others can be present in the type specifiers even though they + // apply to the decl. Here we apply and delete attributes that apply to the + // type and leave the others alone. + llvm::SmallVector<AttributeList *, 8> LeftOverAttrs; + while (AL) { + // Unlink this attribute from the chain, so we can process it independently. + AttributeList *ThisAttr = AL; + AL = AL->getNext(); + ThisAttr->setNext(0); + + // If this is an attribute we can handle, do so now, otherwise, add it to + // the LeftOverAttrs list for rechaining. + switch (ThisAttr->getKind()) { + default: break; + case AttributeList::AT_address_space: + Result = HandleAddressSpaceTypeAttribute(Result, ThisAttr); + delete ThisAttr; // Consume the attribute. + continue; + } + + LeftOverAttrs.push_back(ThisAttr); + } + + // Rechain any attributes that haven't been deleted to the DeclSpec. + AttributeList *List = 0; + for (unsigned i = 0, e = LeftOverAttrs.size(); i != e; ++i) { + LeftOverAttrs[i]->setNext(List); + List = LeftOverAttrs[i]; + } + + return List; +} + +/// HandleAddressSpaceTypeAttribute - Process an address_space attribute on the +/// specified type. +QualType Sema::HandleAddressSpaceTypeAttribute(QualType Type, + AttributeList *Attr) { + // If this type is already address space qualified, reject it. + // Clause 6.7.3 - Type qualifiers: "No type shall be qualified by qualifiers + // for two or more different address spaces." + if (Type.getAddressSpace()) { + Diag(Attr->getLoc(), diag::err_attribute_address_multiple_qualifiers); + return Type; + } + + // Check the attribute arguments. + if (Attr->getNumArgs() != 1) { + Diag(Attr->getLoc(), diag::err_attribute_wrong_number_arguments, + std::string("1")); + return Type; + } + Expr *ASArgExpr = static_cast<Expr *>(Attr->getArg(0)); + llvm::APSInt addrSpace(32); + if (!ASArgExpr->isIntegerConstantExpr(addrSpace, Context)) { + Diag(Attr->getLoc(), diag::err_attribute_address_space_not_int, + ASArgExpr->getSourceRange()); + return Type; + } + + unsigned ASIdx = static_cast<unsigned>(addrSpace.getZExtValue()); + return Context.getASQualType(Type, ASIdx); +} + diff --git a/clang/lib/Sema/SemaUtil.h b/clang/lib/Sema/SemaUtil.h new file mode 100644 index 00000000000..35452b18e6f --- /dev/null +++ b/clang/lib/Sema/SemaUtil.h @@ -0,0 +1,35 @@ +//===--- SemaUtil.h - Utility functions for semantic analysis -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides a few static inline functions that are useful for +// performing semantic analysis. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SEMA_UTIL_H +#define LLVM_CLANG_SEMA_UTIL_H + +#include "clang/AST/Expr.h" + +namespace clang { + +/// Utility method to determine if a CallExpr is a call to a builtin. +static inline bool isCallBuiltin(CallExpr* cexp) { + Expr* sub = cexp->getCallee()->IgnoreParenCasts(); + + if (DeclRefExpr* E = dyn_cast<DeclRefExpr>(sub)) + if (E->getDecl()->getIdentifier()->getBuiltinID() > 0) + return true; + + return false; +} + +} // end namespace clang + +#endif |