summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lld/COFF/Chunks.cpp42
-rw-r--r--lld/COFF/Chunks.h32
-rw-r--r--lld/COFF/ICF.cpp6
-rw-r--r--lld/COFF/InputFiles.cpp15
-rw-r--r--lld/COFF/InputFiles.h3
-rw-r--r--lld/COFF/Writer.cpp4
-rw-r--r--lld/test/COFF/string-tail-merge.s87
7 files changed, 184 insertions, 5 deletions
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 6da121ef40c..451e18a600b 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -571,5 +571,47 @@ uint8_t Baserel::getDefaultType() {
}
}
+std::map<uint32_t, MergeChunk *> MergeChunk::Instances;
+
+MergeChunk::MergeChunk(uint32_t Alignment)
+ : Builder(StringTableBuilder::RAW, Alignment) {
+ this->Alignment = Alignment;
+}
+
+void MergeChunk::addSection(SectionChunk *C) {
+ auto *&MC = Instances[C->Alignment];
+ if (!MC)
+ MC = make<MergeChunk>(C->Alignment);
+ MC->Sections.push_back(C);
+}
+
+void MergeChunk::finalizeContents() {
+ for (SectionChunk *C : Sections)
+ if (C->isLive())
+ Builder.add(toStringRef(C->getContents()));
+ Builder.finalize();
+
+ for (SectionChunk *C : Sections) {
+ if (!C->isLive())
+ continue;
+ size_t Off = Builder.getOffset(toStringRef(C->getContents()));
+ C->setOutputSection(Out);
+ C->setRVA(RVA + Off);
+ C->OutputSectionOff = OutputSectionOff + Off;
+ }
+}
+
+uint32_t MergeChunk::getPermissions() const {
+ return IMAGE_SCN_MEM_READ | IMAGE_SCN_CNT_INITIALIZED_DATA;
+}
+
+size_t MergeChunk::getSize() const {
+ return Builder.getSize();
+}
+
+void MergeChunk::writeTo(uint8_t *Buf) const {
+ Builder.write(Buf + OutputSectionOff);
+}
+
} // namespace coff
} // namespace lld
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index b4b22997fea..b95869a30d2 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Object/COFF.h"
#include <utility>
#include <vector>
@@ -60,6 +61,10 @@ public:
// before calling this function.
virtual void writeTo(uint8_t *Buf) const {}
+ // Called by the writer after an RVA is assigned, but before calling
+ // getSize().
+ virtual void finalizeContents() {}
+
// The writer sets and uses the addresses.
uint64_t getRVA() const { return RVA; }
void setRVA(uint64_t V) { RVA = V; }
@@ -222,6 +227,33 @@ private:
uint32_t Class[2] = {0, 0};
};
+// This class is used to implement an lld-specific feature (not implemented in
+// MSVC) that minimizes the output size by finding string literals sharing tail
+// parts and merging them.
+//
+// If string tail merging is enabled and a section is identified as containing a
+// string literal, it is added to a MergeChunk with an appropriate alignment.
+// The MergeChunk then tail merges the strings using the StringTableBuilder
+// class and assigns RVAs and section offsets to each of the member chunks based
+// on the offsets assigned by the StringTableBuilder.
+class MergeChunk : public Chunk {
+public:
+ MergeChunk(uint32_t Alignment);
+ static void addSection(SectionChunk *C);
+ void finalizeContents() override;
+
+ uint32_t getPermissions() const override;
+ StringRef getSectionName() const override { return ".rdata"; }
+ size_t getSize() const override;
+ void writeTo(uint8_t *Buf) const override;
+
+ static std::map<uint32_t, MergeChunk *> Instances;
+ std::vector<SectionChunk *> Sections;
+
+private:
+ llvm::StringTableBuilder Builder;
+};
+
// A chunk for common symbols. Common chunks don't have actual data.
class CommonChunk : public Chunk {
public:
diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp
index c063ab28edc..2f979327b82 100644
--- a/lld/COFF/ICF.cpp
+++ b/lld/COFF/ICF.cpp
@@ -224,6 +224,12 @@ void ICF::run(ArrayRef<Chunk *> Vec) {
}
}
+ // Make sure that ICF doesn't merge sections that are being handled by string
+ // tail merging.
+ for (auto &P : MergeChunk::Instances)
+ for (SectionChunk *SC : P.second->Sections)
+ SC->Class[0] = NextId++;
+
// Initially, we use hash values to partition sections.
for_each(parallel::par, Chunks.begin(), Chunks.end(), [&](SectionChunk *SC) {
// Set MSB to 1 to avoid collisions with non-hash classs.
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index b60e8c8e765..78bfe34088f 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -138,12 +138,13 @@ void ObjFile::initializeChunks() {
if (Sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
SparseChunks[I] = PendingComdat;
else
- SparseChunks[I] = readSection(I, nullptr);
+ SparseChunks[I] = readSection(I, nullptr, "");
}
}
SectionChunk *ObjFile::readSection(uint32_t SectionNumber,
- const coff_aux_section_definition *Def) {
+ const coff_aux_section_definition *Def,
+ StringRef LeaderName) {
const coff_section *Sec;
StringRef Name;
if (auto EC = COFFObj->getSection(SectionNumber, Sec))
@@ -189,6 +190,12 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber,
GuardLJmpChunks.push_back(C);
else if (Name == ".sxdata")
SXDataChunks.push_back(C);
+ else if (Config->DoICF && Sec->NumberOfRelocations == 0 && Name == ".rdata" &&
+ LeaderName.startswith("??_C@"))
+ // COFF sections that look like string literal sections (i.e. no
+ // relocations, in .rdata, leader symbol name matches the MSVC name mangling
+ // for string literals) are subject to string tail merging.
+ MergeChunk::addSection(C);
else
Chunks.push_back(C);
@@ -209,7 +216,7 @@ void ObjFile::readAssociativeDefinition(
// the section; otherwise mark it as discarded.
int32_t SectionNumber = Sym.getSectionNumber();
if (Parent) {
- SparseChunks[SectionNumber] = readSection(SectionNumber, Def);
+ SparseChunks[SectionNumber] = readSection(SectionNumber, Def, "");
if (SparseChunks[SectionNumber])
Parent->addAssociative(SparseChunks[SectionNumber]);
} else {
@@ -343,7 +350,7 @@ Optional<Symbol *> ObjFile::createDefined(
Prevailing = true;
}
if (Prevailing) {
- SectionChunk *C = readSection(SectionNumber, Def);
+ SectionChunk *C = readSection(SectionNumber, Def, Name);
SparseChunks[SectionNumber] = C;
C->Sym = cast<DefinedRegular>(Leader);
cast<DefinedRegular>(Leader)->Data = &C->Repl;
diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h
index 000fdc69d7a..3ee578030fd 100644
--- a/lld/COFF/InputFiles.h
+++ b/lld/COFF/InputFiles.h
@@ -150,7 +150,8 @@ private:
SectionChunk *
readSection(uint32_t SectionNumber,
- const llvm::object::coff_aux_section_definition *Def);
+ const llvm::object::coff_aux_section_definition *Def,
+ StringRef LeaderName);
void readAssociativeDefinition(
COFFSymbolRef COFFSym,
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 7e1e80198f2..19c0c39138a 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -426,6 +426,9 @@ void Writer::createSections() {
void Writer::createMiscChunks() {
OutputSection *RData = createSection(".rdata");
+ for (auto &P : MergeChunk::Instances)
+ RData->addChunk(P.second);
+
// Create thunks for locally-dllimported symbols.
if (!Symtab->LocalImportChunks.empty()) {
for (Chunk *C : Symtab->LocalImportChunks)
@@ -665,6 +668,7 @@ void Writer::assignAddresses() {
VirtualSize = alignTo(VirtualSize, C->Alignment);
C->setRVA(RVA + VirtualSize);
C->OutputSectionOff = VirtualSize;
+ C->finalizeContents();
VirtualSize += C->getSize();
if (C->hasData())
RawSize = alignTo(VirtualSize, SectorSize);
diff --git a/lld/test/COFF/string-tail-merge.s b/lld/test/COFF/string-tail-merge.s
new file mode 100644
index 00000000000..f55041f5c55
--- /dev/null
+++ b/lld/test/COFF/string-tail-merge.s
@@ -0,0 +1,87 @@
+# REQUIRES: x86
+# RUN: llvm-mc -triple=x86_64-windows-msvc -filetype=obj -o %t.obj %s
+# RUN: lld-link %t.obj /out:%t.exe /entry:main /subsystem:console
+# RUN: llvm-objdump -s %t.exe | FileCheck %s
+
+# CHECK: Contents of section .rdata:
+# CHECK-NEXT: 140002000 68656c6c 6f20776f 726c6400 6fa26ca4 hello world.o.l.
+# CHECK-NEXT: 140002010 0068656c 6c6f2077 6f726c64 00006865 .hello world..he
+# CHECK-NEXT: 140002020 6c6c6f20 776f726c 64006800 65006c00 llo world.h.e.l.
+# CHECK-NEXT: 140002030 6c006f00 20007700 6f007200 6c006400 l.o. .w.o.r.l.d.
+# CHECK-NEXT: 140002040 0000 ..
+
+# CHECK: Contents of section .text:
+.globl main
+main:
+# CHECK-NEXT: 140003000 11200040 01000000 17200040 01000000
+.8byte "??_C@_0M@LACCCNMM@hello?5world?$AA@"
+.8byte "??_C@_05MCBCHHEJ@world?$AA@"
+# CHECK-NEXT: 140003010 2a200040 01000000 36200040 01000000
+.8byte "??_C@_1BI@HHJHKLLN@?$AAh?$AAe?$AAl?$AAl?$AAo?$AA?5?$AAw?$AAo?$AAr?$AAl?$AAd?$AA?$AA@"
+.8byte "??_C@_1M@NBBDDHIO@?$AAw?$AAo?$AAr?$AAl?$AAd?$AA?$AA@"
+# CHECK-NEXT: 140003020 00200040 01000000 0c200040 01000000
+.8byte "??_D@not_a_string_literal"
+.8byte "??_C@string_literal_with_relocs"
+# CHECK-NEXT: 140003030 00100040 01000000 1e200040 01000000
+.8byte "??_C@string_literal_in_wrong_section"
+.8byte "??_C@overaligned_string_literal"
+
+.section .rdata,"dr",discard,"??_C@_0M@LACCCNMM@hello?5world?$AA@"
+.globl "??_C@_0M@LACCCNMM@hello?5world?$AA@"
+"??_C@_0M@LACCCNMM@hello?5world?$AA@":
+.asciz "hello world"
+
+.section .rdata,"dr",discard,"??_C@_05MCBCHHEJ@world?$AA@"
+.globl "??_C@_05MCBCHHEJ@world?$AA@"
+"??_C@_05MCBCHHEJ@world?$AA@":
+.asciz "world"
+
+.section .rdata,"dr",discard,"??_C@_1BI@HHJHKLLN@?$AAh?$AAe?$AAl?$AAl?$AAo?$AA?5?$AAw?$AAo?$AAr?$AAl?$AAd?$AA?$AA@"
+.globl "??_C@_1BI@HHJHKLLN@?$AAh?$AAe?$AAl?$AAl?$AAo?$AA?5?$AAw?$AAo?$AAr?$AAl?$AAd?$AA?$AA@"
+.p2align 1
+"??_C@_1BI@HHJHKLLN@?$AAh?$AAe?$AAl?$AAl?$AAo?$AA?5?$AAw?$AAo?$AAr?$AAl?$AAd?$AA?$AA@":
+.short 104
+.short 101
+.short 108
+.short 108
+.short 111
+.short 32
+.short 119
+.short 111
+.short 114
+.short 108
+.short 100
+.short 0
+
+.section .rdata,"dr",discard,"??_C@_1M@NBBDDHIO@?$AAw?$AAo?$AAr?$AAl?$AAd?$AA?$AA@"
+.globl "??_C@_1M@NBBDDHIO@?$AAw?$AAo?$AAr?$AAl?$AAd?$AA?$AA@"
+.p2align 1
+"??_C@_1M@NBBDDHIO@?$AAw?$AAo?$AAr?$AAl?$AAd?$AA?$AA@":
+.short 119
+.short 111
+.short 114
+.short 108
+.short 100
+.short 0
+
+.section .data,"drw",discard,"??_C@string_literal_in_wrong_section"
+.globl "??_C@string_literal_in_wrong_section"
+"??_C@string_literal_in_wrong_section":
+.asciz "hello world"
+
+.section .rdata,"dr",discard,"??_D@not_a_string_literal"
+.globl "??_D@not_a_string_literal"
+"??_D@not_a_string_literal":
+.asciz "hello world"
+
+.section .rdata,"dr",discard,"??_C@string_literal_with_relocs"
+.globl "??_C@string_literal_with_relocs"
+"??_C@string_literal_with_relocs":
+.4byte main + 111 + (114 << 8) + (108 << 16) + (100 << 24) # main + "orld"
+.byte 0
+
+.section .rdata,"dr",discard,"??_C@overaligned_string_literal"
+.globl "??_C@overaligned_string_literal"
+.p2align 1
+"??_C@overaligned_string_literal":
+.asciz "hello world"
OpenPOWER on IntegriCloud