Skip to content

Commit dffa9df

Browse files
committed
[clangd] Shard preamble symbols in dynamic index
Summary: This reduces memory usage by dynamic index from more than 400MB to 32MB when all files in clang-tools-extra/clangd/*.cpp are active in clangd. Reviewers: sammccall Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D77732
1 parent 9bd6b77 commit dffa9df

File tree

6 files changed

+385
-165
lines changed

6 files changed

+385
-165
lines changed

clang-tools-extra/clangd/index/Background.cpp

Lines changed: 22 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -61,51 +61,6 @@ namespace clang {
6161
namespace clangd {
6262
namespace {
6363

64-
// Resolves URI to file paths with cache.
65-
class URIToFileCache {
66-
public:
67-
URIToFileCache(llvm::StringRef HintPath) : HintPath(HintPath) {}
68-
69-
llvm::StringRef resolve(llvm::StringRef FileURI) {
70-
auto I = URIToPathCache.try_emplace(FileURI);
71-
if (I.second) {
72-
auto Path = URI::resolve(FileURI, HintPath);
73-
if (!Path) {
74-
elog("Failed to resolve URI {0}: {1}", FileURI, Path.takeError());
75-
assert(false && "Failed to resolve URI");
76-
return "";
77-
}
78-
I.first->second = *Path;
79-
}
80-
return I.first->second;
81-
}
82-
83-
private:
84-
std::string HintPath;
85-
llvm::StringMap<std::string> URIToPathCache;
86-
};
87-
88-
// We keep only the node "U" and its edges. Any node other than "U" will be
89-
// empty in the resultant graph.
90-
IncludeGraph getSubGraph(const URI &U, const IncludeGraph &FullGraph) {
91-
IncludeGraph IG;
92-
93-
std::string FileURI = U.toString();
94-
auto Entry = IG.try_emplace(FileURI).first;
95-
auto &Node = Entry->getValue();
96-
Node = FullGraph.lookup(Entry->getKey());
97-
Node.URI = Entry->getKey();
98-
99-
// URIs inside nodes must point into the keys of the same IncludeGraph.
100-
for (auto &Include : Node.DirectIncludes) {
101-
auto I = IG.try_emplace(Include).first;
102-
I->getValue().URI = I->getKey();
103-
Include = I->getKey();
104-
}
105-
106-
return IG;
107-
}
108-
10964
// We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or
11065
// relative to Cmd.Directory, which might not be the same as current working
11166
// directory.
@@ -219,108 +174,44 @@ void BackgroundIndex::update(
219174
llvm::StringRef MainFile, IndexFileIn Index,
220175
const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot,
221176
bool HadErrors) {
222-
// Partition symbols/references into files.
223-
struct File {
224-
llvm::DenseSet<const Symbol *> Symbols;
225-
llvm::DenseSet<const Ref *> Refs;
226-
llvm::DenseSet<const Relation *> Relations;
227-
FileDigest Digest;
228-
};
229-
llvm::StringMap<File> Files;
230-
URIToFileCache URICache(MainFile);
177+
llvm::StringMap<FileDigest> FilesToUpdate;
231178
for (const auto &IndexIt : *Index.Sources) {
232179
const auto &IGN = IndexIt.getValue();
233180
// Note that sources do not contain any information regarding missing
234181
// headers, since we don't even know what absolute path they should fall in.
235-
const auto AbsPath = URICache.resolve(IGN.URI);
182+
auto AbsPath = llvm::cantFail(URI::resolve(IGN.URI, MainFile),
183+
"Failed to resovle URI");
236184
const auto DigestIt = ShardVersionsSnapshot.find(AbsPath);
237185
// File has different contents, or indexing was successful this time.
238186
if (DigestIt == ShardVersionsSnapshot.end() ||
239187
DigestIt->getValue().Digest != IGN.Digest ||
240188
(DigestIt->getValue().HadErrors && !HadErrors))
241-
Files.try_emplace(AbsPath).first->getValue().Digest = IGN.Digest;
242-
}
243-
// This map is used to figure out where to store relations.
244-
llvm::DenseMap<SymbolID, File *> SymbolIDToFile;
245-
for (const auto &Sym : *Index.Symbols) {
246-
if (Sym.CanonicalDeclaration) {
247-
auto DeclPath = URICache.resolve(Sym.CanonicalDeclaration.FileURI);
248-
const auto FileIt = Files.find(DeclPath);
249-
if (FileIt != Files.end()) {
250-
FileIt->second.Symbols.insert(&Sym);
251-
SymbolIDToFile[Sym.ID] = &FileIt->second;
252-
}
253-
}
254-
// For symbols with different declaration and definition locations, we store
255-
// the full symbol in both the header file and the implementation file, so
256-
// that merging can tell the preferred symbols (from canonical headers) from
257-
// other symbols (e.g. forward declarations).
258-
if (Sym.Definition &&
259-
Sym.Definition.FileURI != Sym.CanonicalDeclaration.FileURI) {
260-
auto DefPath = URICache.resolve(Sym.Definition.FileURI);
261-
const auto FileIt = Files.find(DefPath);
262-
if (FileIt != Files.end())
263-
FileIt->second.Symbols.insert(&Sym);
264-
}
265-
}
266-
llvm::DenseMap<const Ref *, SymbolID> RefToIDs;
267-
for (const auto &SymRefs : *Index.Refs) {
268-
for (const auto &R : SymRefs.second) {
269-
auto Path = URICache.resolve(R.Location.FileURI);
270-
const auto FileIt = Files.find(Path);
271-
if (FileIt != Files.end()) {
272-
auto &F = FileIt->getValue();
273-
RefToIDs[&R] = SymRefs.first;
274-
F.Refs.insert(&R);
275-
}
276-
}
277-
}
278-
for (const auto &Rel : *Index.Relations) {
279-
const auto FileIt = SymbolIDToFile.find(Rel.Subject);
280-
if (FileIt != SymbolIDToFile.end())
281-
FileIt->second->Relations.insert(&Rel);
189+
FilesToUpdate[AbsPath] = IGN.Digest;
282190
}
283191

284-
// Build and store new slabs for each updated file.
285-
for (const auto &FileIt : Files) {
286-
llvm::StringRef Path = FileIt.getKey();
287-
SymbolSlab::Builder Syms;
288-
RefSlab::Builder Refs;
289-
RelationSlab::Builder Relations;
290-
for (const auto *S : FileIt.second.Symbols)
291-
Syms.insert(*S);
292-
for (const auto *R : FileIt.second.Refs)
293-
Refs.insert(RefToIDs[R], *R);
294-
for (const auto *Rel : FileIt.second.Relations)
295-
Relations.insert(*Rel);
296-
auto SS = std::make_unique<SymbolSlab>(std::move(Syms).build());
297-
auto RS = std::make_unique<RefSlab>(std::move(Refs).build());
298-
auto RelS = std::make_unique<RelationSlab>(std::move(Relations).build());
299-
auto IG = std::make_unique<IncludeGraph>(
300-
getSubGraph(URI::create(Path), Index.Sources.getValue()));
192+
// Shard slabs into files.
193+
FileShardedIndex ShardedIndex(std::move(Index), MainFile);
301194

302-
// We need to store shards before updating the index, since the latter
303-
// consumes slabs.
304-
// FIXME: Also skip serializing the shard if it is already up-to-date.
305-
BackgroundIndexStorage *IndexStorage = IndexStorageFactory(Path);
306-
IndexFileOut Shard;
307-
Shard.Symbols = SS.get();
308-
Shard.Refs = RS.get();
309-
Shard.Relations = RelS.get();
310-
Shard.Sources = IG.get();
195+
// Build and store new slabs for each updated file.
196+
for (const auto &FileIt : FilesToUpdate) {
197+
PathRef Path = FileIt.first();
198+
auto IF = ShardedIndex.getShard(Path);
311199

312200
// Only store command line hash for main files of the TU, since our
313201
// current model keeps only one version of a header file.
314-
if (Path == MainFile)
315-
Shard.Cmd = Index.Cmd.getPointer();
202+
if (Path != MainFile)
203+
IF.Cmd.reset();
316204

317-
if (auto Error = IndexStorage->storeShard(Path, Shard))
205+
// We need to store shards before updating the index, since the latter
206+
// consumes slabs.
207+
// FIXME: Also skip serializing the shard if it is already up-to-date.
208+
if (auto Error = IndexStorageFactory(Path)->storeShard(Path, IF))
318209
elog("Failed to write background-index shard for file {0}: {1}", Path,
319210
std::move(Error));
320211

321212
{
322213
std::lock_guard<std::mutex> Lock(ShardVersionsMu);
323-
auto Hash = FileIt.second.Digest;
214+
const auto &Hash = FileIt.getValue();
324215
auto DigestIt = ShardVersions.try_emplace(Path);
325216
ShardVersion &SV = DigestIt.first->second;
326217
// Skip if file is already up to date, unless previous index was broken
@@ -333,8 +224,11 @@ void BackgroundIndex::update(
333224
// This can override a newer version that is added in another thread, if
334225
// this thread sees the older version but finishes later. This should be
335226
// rare in practice.
336-
IndexedSymbols.update(Path, std::move(SS), std::move(RS), std::move(RelS),
337-
Path == MainFile);
227+
IndexedSymbols.update(
228+
Path, std::make_unique<SymbolSlab>(std::move(*IF.Symbols)),
229+
std::make_unique<RefSlab>(std::move(*IF.Refs)),
230+
std::make_unique<RelationSlab>(std::move(*IF.Relations)),
231+
Path == MainFile);
338232
}
339233
}
340234
}

0 commit comments

Comments
 (0)