summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRui Ueyama <ruiu@google.com>2019-05-23 09:53:30 +0000
committerRui Ueyama <ruiu@google.com>2019-05-23 09:53:30 +0000
commit42548403133d9b9a22f0f3588eded31f050c1664 (patch)
tree2459f6d34860f787354cd92dae7c1a3252f32fcf
parent77b4f0abb82c419fedca2ae3a20e1201f752b894 (diff)
downloadbcm5719-llvm-42548403133d9b9a22f0f3588eded31f050c1664.tar.gz
bcm5719-llvm-42548403133d9b9a22f0f3588eded31f050c1664.zip
Speed up --start-lib and --end-lib.
--{start,end}-lib give files grouped by the options the archive file semantics. That is, each object file between them acts as if it were in an archive file whose sole member is the file. Therefore, files between --{start,end}-lib are linked to the final output only if they are needed to resolve some undefined symbols. Previously, the feature was implemented this way: 1. We read a symbol table and insert defined symbols to the symbol table as lazy symbols. 2. If an undefind symbol is resolved to a lazy symbol, that lazy symbol instantiate ObjFile class for that symbol, which re-insert all defined symbols to the symbol table. So, if an ObjFile is instantiated, defined symbols are inserted to the symbol table twice. Since inserting long symbol names is not cheap, there's a room to optimize here. This patch optimzies it. Now, LazyObjFile remembers symbol handles and passed them over to a new ObjFile instance, so that the ObjFile doesn't insert the same strings. Here is a quick benchmark to link clang. "Original" is the original lld with unmodified command line options. For "Case 1" and "Case 2", I extracted all files from archive files and replace .a's in a command line with .o's wrapped with --{start,end}-lib. I used the original lld for Case 1" and use this patch for Case 2. Original: 5.892 Case 1: 6.001 (+1.8%) Case 2: 5.701 (-3.2%) So, interestingly, --{start,end}-lib are now faster than the regular linking scheme with archive files. That's perhaps not too surprising, though, because for regular archive files, we look up the symbol table with the same string twice. Differential Revision: https://reviews.llvm.org/D62188 llvm-svn: 361473
-rw-r--r--lld/ELF/InputFiles.cpp162
-rw-r--r--lld/ELF/InputFiles.h4
-rw-r--r--lld/ELF/SymbolTable.cpp1
3 files changed, 108 insertions, 59 deletions
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 6db1217e08f..a82d99683cc 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -913,62 +913,91 @@ StringRef ObjFile<ELFT>::getSectionName(const Elf_Shdr &Sec) {
return CHECK(getObj().getSectionName(&Sec, SectionStringTable), this);
}
+// Initialize this->Symbols. this->Symbols is a parallel array as
+// its corresponding ELF symbol table.
template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
- this->Symbols.reserve(this->getELFSyms<ELFT>().size());
- for (const Elf_Sym &Sym : this->getELFSyms<ELFT>())
- this->Symbols.push_back(createSymbol(&Sym));
-}
-
-template <class ELFT> Symbol *ObjFile<ELFT>::createSymbol(const Elf_Sym *Sym) {
- uint32_t SecIdx = getSectionIndex(*Sym);
- if (SecIdx >= this->Sections.size())
- fatal(toString(this) + ": invalid section index: " + Twine(SecIdx));
-
- InputSectionBase *Sec = this->Sections[SecIdx];
- uint8_t Binding = Sym->getBinding();
- uint8_t StOther = Sym->st_other;
- uint8_t Type = Sym->getType();
- uint64_t Value = Sym->st_value;
- uint64_t Size = Sym->st_size;
-
- if (Binding == STB_LOCAL) {
- if (Sym->getType() == STT_FILE)
- SourceFile = CHECK(Sym->getName(this->StringTable), this);
-
- if (this->StringTable.size() <= Sym->st_name)
- fatal(toString(this) + ": invalid symbol name offset");
-
- StringRefZ Name = this->StringTable.data() + Sym->st_name;
- if (Sym->st_shndx == SHN_UNDEF)
- return make<Undefined>(this, Name, Binding, StOther, Type);
- return make<Defined>(this, Name, Binding, StOther, Type, Value, Size, Sec);
- }
+ ArrayRef<Elf_Sym> ESyms = this->getELFSyms<ELFT>();
+ this->Symbols.resize(ESyms.size());
+
+ // Our symbol table may have already been partially initialized
+ // because of LazyObjFile.
+ for (size_t I = 0, End = ESyms.size(); I != End; ++I)
+ if (!this->Symbols[I] && ESyms[I].getBinding() != STB_LOCAL)
+ this->Symbols[I] =
+ Symtab->insert(CHECK(ESyms[I].getName(this->StringTable), this));
+
+ // Fill this->Symbols. A symbol is either local or global.
+ for (size_t I = 0, End = ESyms.size(); I != End; ++I) {
+ const Elf_Sym &ESym = ESyms[I];
+
+ // Read symbol attributes.
+ uint32_t SecIdx = getSectionIndex(ESym);
+ if (SecIdx >= this->Sections.size())
+ fatal(toString(this) + ": invalid section index: " + Twine(SecIdx));
+
+ InputSectionBase *Sec = this->Sections[SecIdx];
+ uint8_t Binding = ESym.getBinding();
+ uint8_t StOther = ESym.st_other;
+ uint8_t Type = ESym.getType();
+ uint64_t Value = ESym.st_value;
+ uint64_t Size = ESym.st_size;
+ StringRefZ Name = this->StringTable.data() + ESym.st_name;
+
+ // Handle local symbols. Local symbols are not added to the symbol
+ // table because they are not visible from other object files. We
+ // allocate symbol instances and add their pointers to Symbols.
+ if (Binding == STB_LOCAL) {
+ if (ESym.getType() == STT_FILE)
+ SourceFile = CHECK(ESym.getName(this->StringTable), this);
+
+ if (this->StringTable.size() <= ESym.st_name)
+ fatal(toString(this) + ": invalid symbol name offset");
+
+ if (ESym.st_shndx == SHN_UNDEF)
+ this->Symbols[I] = make<Undefined>(this, Name, Binding, StOther, Type);
+ else
+ this->Symbols[I] =
+ make<Defined>(this, Name, Binding, StOther, Type, Value, Size, Sec);
+ continue;
+ }
- StringRef Name = CHECK(Sym->getName(this->StringTable), this);
+ // Handle global undefined symbols.
+ if (ESym.st_shndx == SHN_UNDEF) {
+ resolveSymbol(this->Symbols[I],
+ Undefined{this, Name, Binding, StOther, Type});
+ continue;
+ }
- if (Sym->st_shndx == SHN_UNDEF)
- return Symtab->addSymbol(Undefined{this, Name, Binding, StOther, Type});
+ // Handle global common symbols.
+ if (ESym.st_shndx == SHN_COMMON) {
+ if (Value == 0 || Value >= UINT32_MAX)
+ fatal(toString(this) + ": common symbol '" + StringRef(Name.Data) +
+ "' has invalid alignment: " + Twine(Value));
+ resolveSymbol(this->Symbols[I], CommonSymbol{this, Name, Binding, StOther,
+ Type, Value, Size});
+ continue;
+ }
- if (Sec == &InputSection::Discarded)
- return Symtab->addSymbol(Undefined{this, Name, Binding, StOther, Type,
- /*DiscardedSecIdx=*/SecIdx});
+ // If a defined symbol is in a discarded section, handle it as if it
+ // were an undefined symbol. Such symbol doesn't comply with the
+ // standard, but in practice, a .eh_frame often directly refer
+ // COMDAT member sections, and if a comdat group is discarded, some
+ // defined symbol in a .eh_frame becomes dangling symbols.
+ if (Sec == &InputSection::Discarded) {
+ resolveSymbol(this->Symbols[I],
+ Undefined{this, Name, Binding, StOther, Type, SecIdx});
+ continue;
+ }
- if (Sym->st_shndx == SHN_COMMON) {
- if (Value == 0 || Value >= UINT32_MAX)
- fatal(toString(this) + ": common symbol '" + Name +
- "' has invalid alignment: " + Twine(Value));
- return Symtab->addSymbol(
- CommonSymbol{this, Name, Binding, StOther, Type, Value, Size});
- }
+ // Handle global defined symbols.
+ if (Binding == STB_GLOBAL || Binding == STB_WEAK ||
+ Binding == STB_GNU_UNIQUE) {
+ resolveSymbol(this->Symbols[I], Defined{this, Name, Binding, StOther,
+ Type, Value, Size, Sec});
+ continue;
+ }
- switch (Binding) {
- default:
fatal(toString(this) + ": unexpected binding: " + Twine((int)Binding));
- case STB_GLOBAL:
- case STB_WEAK:
- case STB_GNU_UNIQUE:
- return Symtab->addSymbol(
- Defined{this, Name, Binding, StOther, Type, Value, Size, Sec});
}
}
@@ -1455,10 +1484,16 @@ InputFile *LazyObjFile::fetch() {
InputFile *File = createObjectFile(MBRef, ArchiveName, OffsetInArchive);
File->GroupId = GroupId;
+
+ // Copy symbol vector so that the new InputFile doesn't have to
+ // insert the same defined symbols to the symbol table again.
+ File->Symbols = std::move(Symbols);
return File;
}
template <class ELFT> void LazyObjFile::parse() {
+ using Elf_Sym = typename ELFT::Sym;
+
// A lazy object file wraps either a bitcode file or an ELF file.
if (isBitcode(this->MB)) {
std::unique_ptr<lto::InputFile> Obj =
@@ -1476,6 +1511,7 @@ template <class ELFT> void LazyObjFile::parse() {
return;
}
+ // Find a symbol table.
ELFFile<ELFT> Obj = check(ELFFile<ELFT>::create(MB.getBuffer()));
ArrayRef<typename ELFT::Shdr> Sections = CHECK(Obj.sections(), this);
@@ -1483,16 +1519,28 @@ template <class ELFT> void LazyObjFile::parse() {
if (Sec.sh_type != SHT_SYMTAB)
continue;
- typename ELFT::SymRange Syms = CHECK(Obj.symbols(&Sec), this);
+ // A symbol table is found.
+ ArrayRef<Elf_Sym> ESyms = CHECK(Obj.symbols(&Sec), this);
uint32_t FirstGlobal = Sec.sh_info;
- StringRef StringTable =
- CHECK(Obj.getStringTableForSymtab(Sec, Sections), this);
-
- for (const typename ELFT::Sym &Sym : Syms.slice(FirstGlobal)) {
- if (Sym.st_shndx == SHN_UNDEF)
+ StringRef Strtab = CHECK(Obj.getStringTableForSymtab(Sec, Sections), this);
+ this->Symbols.resize(ESyms.size());
+
+ // Get existing symbols or insert placeholder symbols.
+ for (size_t I = FirstGlobal, End = ESyms.size(); I != End; ++I)
+ if (ESyms[I].st_shndx != SHN_UNDEF)
+ this->Symbols[I] = Symtab->insert(CHECK(ESyms[I].getName(Strtab), this));
+
+ // Replace existing symbols with LazyObject symbols.
+ //
+ // resolveSymbol() may trigger this->fetch() if an existing symbol
+ // is an undefined symbol. If that happens, this LazyObjFile has
+ // served its purpose, and we can exit from the loop early.
+ for (Symbol *Sym : this->Symbols) {
+ if (!Sym)
continue;
- Symtab->addSymbol(
- LazyObject{*this, CHECK(Sym.getName(StringTable), this)});
+ resolveSymbol(Sym, LazyObject{*this, Sym->getName()});
+ if (AddedToLink)
+ return;
}
return;
}
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index f3d9241b5db..7d5e9a2346f 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -139,10 +139,11 @@ public:
// Index of MIPS GOT built for this file.
llvm::Optional<size_t> MipsGotIndex;
+ std::vector<Symbol *> Symbols;
+
protected:
InputFile(Kind K, MemoryBufferRef M);
std::vector<InputSectionBase *> Sections;
- std::vector<Symbol *> Symbols;
private:
const Kind FileKind;
@@ -255,7 +256,6 @@ private:
StringRef getSectionName(const Elf_Shdr &Sec);
bool shouldMerge(const Elf_Shdr &Sec);
- Symbol *createSymbol(const Elf_Sym *Sym);
// Each ELF symbol contains a section index which the symbol belongs to.
// However, because the number of bits dedicated for that is limited, a
diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp
index 44fdb96b3e0..c77243e010d 100644
--- a/lld/ELF/SymbolTable.cpp
+++ b/lld/ELF/SymbolTable.cpp
@@ -92,6 +92,7 @@ Symbol *SymbolTable::insert(StringRef Name) {
Symbol *Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
SymVector.push_back(Sym);
+ Sym->setName(Name);
Sym->SymbolKind = Symbol::PlaceholderKind;
Sym->VersionId = Config->DefaultSymbolVersion;
Sym->Visibility = STV_DEFAULT;
OpenPOWER on IntegriCloud