diff options
| author | Rui Ueyama <ruiu@google.com> | 2016-12-05 18:11:35 +0000 |
|---|---|---|
| committer | Rui Ueyama <ruiu@google.com> | 2016-12-05 18:11:35 +0000 |
| commit | fcd3fa83ea2490b9610073da38a91e1b085ff6a4 (patch) | |
| tree | 53b0cbddb0759bff2fc20e308ec2a6d4b3fb51b1 /lld/ELF/ICF.cpp | |
| parent | 941fa7588bbee856a8d3f634c5b88f101089d30f (diff) | |
| download | bcm5719-llvm-fcd3fa83ea2490b9610073da38a91e1b085ff6a4.tar.gz bcm5719-llvm-fcd3fa83ea2490b9610073da38a91e1b085ff6a4.zip | |
Use "equivalence class" instead of "color" to describe the concept in ICF.
Also add a citation to GNU gold safe ICF paper.
Differential Revision: https://reviews.llvm.org/D27398
llvm-svn: 288684
Diffstat (limited to 'lld/ELF/ICF.cpp')
| -rw-r--r-- | lld/ELF/ICF.cpp | 161 |
1 files changed, 84 insertions, 77 deletions
diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index 002465ed8a8..32cd0f8a185 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// ICF is short for Identical Code Folding. That is a size optimization to +// ICF is short for Identical Code Folding. This is a size optimization to // identify and merge two or more read-only sections (typically functions) // that happened to have the same contents. It usually reduces output size // by a few percent. @@ -26,43 +26,50 @@ // void bar() { foo(); } // // If you merge the two, their relocations point to the same section and -// thus you know they are mergeable, but how do we know they are mergeable -// in the first place? This is not an easy problem to solve. +// thus you know they are mergeable, but how do you know they are +// mergeable in the first place? This is not an easy problem to solve. // -// What we are doing in LLD is some sort of coloring algorithm. +// What we are doing in LLD is to partition sections into equivalence +// classes. Sections in the same equivalence class when the algorithm +// terminates are considered identical. Here are details: // -// We color non-identical sections in different colors repeatedly. -// Sections in the same color when the algorithm terminates are considered -// identical. Here are the details: +// 1. First, we partition sections using their hash values as keys. Hash +// values contain section types, section contents and numbers of +// relocations. During this step, relocation targets are not taken into +// account. We just put sections that apparently differ into different +// equivalence classes. // -// 1. First, we color all sections using their hash values of section -// types, section contents, and numbers of relocations. At this moment, -// relocation targets are not taken into account. We just color -// sections that apparently differ in different colors. +// 2. Next, for each equivalence class, we visit sections to compare +// relocation targets. Relocation targets are considered equivalent if +// their targets are in the same equivalence class. Sections with +// different relocation targets are put into different equivalence +// clases. // -// 2. Next, for each color C, we visit sections in color C to compare -// relocation target colors. We recolor sections A and B in different -// colors if A's and B's relocations are different in terms of target -// colors. +// 3. If we split an equivalence class in step 2, two relocations +// previously target the same equivalence class may now target +// different equivalence classes. Therefore, we repeat step 2 until a +// convergence is obtained. // -// 3. If we recolor some section in step 2, relocations that were -// previously pointing to the same color targets may now be pointing to -// different colors. Therefore, repeat 2 until a convergence is -// obtained. -// -// 4. For each color C, pick an arbitrary section in color C, and merges -// other sections in color C with it. +// 4. For each equivalence class C, pick an arbitrary section in C, and +// merge all the other sections in C with it. // // For small programs, this algorithm needs 3-5 iterations. For large // programs such as Chromium, it takes more than 20 iterations. // +// This algorithm was mentioned as an "optimistic algorithm" in [1], +// though gold implements a different algorithm than this. +// // We parallelize each step so that multiple threads can work on different -// colors concurrently. That gave us a large performance boost when -// applying ICF on large programs. For example, MSVC link.exe or GNU gold -// takes 10-20 seconds to apply ICF on Chromium, whose output size is -// about 1.5 GB, but LLD can finish it in less than 2 seconds on a 2.8 GHz -// 40 core machine. Even without threading, LLD's ICF is still faster than -// MSVC or gold though. +// equivalence classes concurrently. That gave us a large performance +// boost when applying ICF on large programs. For example, MSVC link.exe +// or GNU gold takes 10-20 seconds to apply ICF on Chromium, whose output +// size is about 1.5 GB, but LLD can finish it in less than 2 seconds on a +// 2.8 GHz 40 core machine. Even without threading, LLD's ICF is still +// faster than MSVC or gold though. +// +// [1] Safe ICF: Pointer Safe and Unwinding aware Identical Code Folding +// in the Gold Linker +// http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36912.pdf // //===----------------------------------------------------------------------===// @@ -103,10 +110,10 @@ private: size_t findBoundary(size_t Begin, size_t End); - void forEachColorRange(size_t Begin, size_t End, + void forEachClassRange(size_t Begin, size_t End, std::function<void(size_t, size_t)> Fn); - void forEachColor(std::function<void(size_t, size_t)> Fn); + void forEachClass(std::function<void(size_t, size_t)> Fn); std::vector<InputSection<ELFT> *> Sections; @@ -116,26 +123,28 @@ private: // The main loop counter. int Cnt = 0; - // We have two locations for colors. On the first iteration of the main - // loop, Color[0] has a valid value, and Color[1] contains garbage. We - // read colors from slot 0 and write to slot 1. So, Color[0] represents - // the current color, and Color[1] represents the next color. On each - // iteration, they switch the roles, so we use them alternately. + // We have two locations for equivalence classes. On the first iteration + // of the main loop, Class[0] has a valid value, and Class[1] contains + // garbage. We read equivalence classes from slot 0 and write to slot 1. + // So, Class[0] represents the current class, and Class[1] represents + // the next class. On each iteration, we switch their roles and use them + // alternately. // // Why are we doing this? Recall that other threads may be working on - // other colors in parallel. They may read colors that we are updating. - // We cannot update colors in place because it breaks the invariance - // that all possibly-identical sections must have the same color at any - // moment. In other words, the for loop to update colors is not an - // atomic operation, and that is observable from other threads. By - // writing new colors to write-only places, we can keep the invariance. + // other equivalence classes in parallel. They may read sections that we + // are updating. We cannot update equivalence classes in place because + // it breaks the invariance that all possibly-identical sections must be + // in the same equivalence class at any moment. In other words, the for + // loop to update equivalence classes is not atomic, and that is + // observable from other threads. By writing new classes to other + // places, we can keep the invariance. // - // Below, `Current` has the index of the current color, and `Next` has - // the index of the next color. If threading is enabled, they are - // either (0, 1) or (1, 0). + // Below, `Current` has the index of the current class, and `Next` has + // the index of the next class. If threading is enabled, they are either + // (0, 1) or (1, 0). // // Note on single-thread: if that's the case, they are always (0, 0) - // because we can safely read next colors without worrying about race + // because we can safely read the next class without worrying about race // conditions. Using the same location makes this algorithm converge // faster because it uses results of the same iteration earlier. int Current = 0; @@ -158,8 +167,7 @@ template <class ELFT> static bool isEligible(InputSection<ELFT> *S) { S->Name != ".init" && S->Name != ".fini"; } -// Split a range into smaller ranges by recoloring sections -// in a given range. +// Split an equivalence class into smaller classes. template <class ELFT> void ICF<ELFT>::segregate(size_t Begin, size_t End, bool Constant) { // This loop rearranges sections in [Begin, End) so that all sections @@ -183,10 +191,10 @@ void ICF<ELFT>::segregate(size_t Begin, size_t End, bool Constant) { size_t Mid = Bound - Sections.begin(); // Now we split [Begin, End) into [Begin, Mid) and [Mid, End) by - // updating the sections in [Begin, End). We use Mid as a color ID - // because every group ends with a unique index. + // updating the sections in [Begin, End). We use Mid as an equivalence + // class ID because every group ends with a unique index. for (size_t I = Begin; I < Mid; ++I) - Sections[I]->Color[Next] = Mid; + Sections[I]->Class[Next] = Mid; // If we created a group, we need to iterate the main loop again. if (Mid != End) @@ -237,7 +245,7 @@ bool ICF<ELFT>::variableEq(const InputSection<ELFT> *A, ArrayRef<RelTy> RelsA, if (&SA == &SB) return true; - // Or, the two sections must have the same color. + // Or, the two sections must be in the same equivalence class. auto *DA = dyn_cast<DefinedRegular<ELFT>>(&SA); auto *DB = dyn_cast<DefinedRegular<ELFT>>(&SB); if (!DA || !DB) @@ -250,12 +258,12 @@ bool ICF<ELFT>::variableEq(const InputSection<ELFT> *A, ArrayRef<RelTy> RelsA, if (!X || !Y) return false; - // Ineligible sections have the special color 0. - // They can never be the same in terms of section colors. - if (X->Color[Current] == 0) + // Ineligible sections are in the special equivalence class 0. + // They can never be the same in terms of the equivalence class. + if (X->Class[Current] == 0) return false; - return X->Color[Current] == Y->Color[Current]; + return X->Class[Current] == Y->Class[Current]; }; return std::equal(RelsA.begin(), RelsA.end(), RelsB.begin(), Eq); @@ -271,21 +279,22 @@ bool ICF<ELFT>::equalsVariable(const InputSection<ELFT> *A, } template <class ELFT> size_t ICF<ELFT>::findBoundary(size_t Begin, size_t End) { + uint32_t Class = Sections[Begin]->Class[Current]; for (size_t I = Begin + 1; I < End; ++I) - if (Sections[Begin]->Color[Current] != Sections[I]->Color[Current]) + if (Class != Sections[I]->Class[Current]) return I; return End; } -// Sections in the same color are contiguous in Sections vector. -// Therefore, Sections vector can be considered as contiguous groups -// of sections, grouped by colors. +// Sections in the same equivalence class are contiguous in Sections +// vector. Therefore, Sections vector can be considered as contiguous +// groups of sections, grouped by the class. // // This function calls Fn on every group that starts within [Begin, End). // Note that a group must starts in that range but doesn't necessarily // have to end before End. template <class ELFT> -void ICF<ELFT>::forEachColorRange(size_t Begin, size_t End, +void ICF<ELFT>::forEachClassRange(size_t Begin, size_t End, std::function<void(size_t, size_t)> Fn) { if (Begin > 0) Begin = findBoundary(Begin - 1, End); @@ -297,13 +306,13 @@ void ICF<ELFT>::forEachColorRange(size_t Begin, size_t End, } } -// Call Fn on each color group. +// Call Fn on each equivalence class. template <class ELFT> -void ICF<ELFT>::forEachColor(std::function<void(size_t, size_t)> Fn) { +void ICF<ELFT>::forEachClass(std::function<void(size_t, size_t)> Fn) { // If threading is disabled or the number of sections are // too small to use threading, call Fn sequentially. if (!Config->Threads || Sections.size() < 1024) { - forEachColorRange(0, Sections.size(), Fn); + forEachClassRange(0, Sections.size(), Fn); ++Cnt; return; } @@ -315,8 +324,8 @@ void ICF<ELFT>::forEachColor(std::function<void(size_t, size_t)> Fn) { size_t NumShards = 256; size_t Step = Sections.size() / NumShards; forLoop(0, NumShards, - [&](size_t I) { forEachColorRange(I * Step, (I + 1) * Step, Fn); }); - forEachColorRange(Step * NumShards, Sections.size(), Fn); + [&](size_t I) { forEachClassRange(I * Step, (I + 1) * Step, Fn); }); + forEachClassRange(Step * NumShards, Sections.size(), Fn); ++Cnt; } @@ -328,34 +337,32 @@ template <class ELFT> void ICF<ELFT>::run() { if (isEligible(S)) Sections.push_back(S); - // Initially, we use hash values to color sections. Therefore, if - // two sections have the same color, they are likely (but not - // guaranteed) to have the same static contents in terms of ICF. + // Initially, we use hash values to partition sections. for (InputSection<ELFT> *S : Sections) - // Set MSB to 1 to avoid collisions with non-hash colors. - S->Color[0] = getHash(S) | (1 << 31); + // Set MSB to 1 to avoid collisions with non-hash IDs. + S->Class[0] = getHash(S) | (1 << 31); - // From now on, sections in Sections are ordered so that sections in - // the same color are consecutive in the vector. + // From now on, sections in Sections vector are ordered so that sections + // in the same equivalence class are consecutive in the vector. std::stable_sort(Sections.begin(), Sections.end(), [](InputSection<ELFT> *A, InputSection<ELFT> *B) { - return A->Color[0] < B->Color[0]; + return A->Class[0] < B->Class[0]; }); // Compare static contents and assign unique IDs for each static content. - forEachColor([&](size_t Begin, size_t End) { segregate(Begin, End, true); }); + forEachClass([&](size_t Begin, size_t End) { segregate(Begin, End, true); }); // Split groups by comparing relocations until convergence is obtained. do { Repeat = false; - forEachColor( + forEachClass( [&](size_t Begin, size_t End) { segregate(Begin, End, false); }); } while (Repeat); log("ICF needed " + Twine(Cnt) + " iterations"); - // Merge sections in the same colors. - forEachColor([&](size_t Begin, size_t End) { + // Merge sections by the equivalence class. + forEachClass([&](size_t Begin, size_t End) { if (End - Begin == 1) return; |

