clang 20.0.0 (based on r547379) from build 12806354. Bug: http://b/379133546 Test: N/A Change-Id: I2eb8938af55d809de674be63cb30cf27e801862b Upstream-Commit: ad834e67b1105d15ef907f6255d4c96e8e733f57
488 lines
19 KiB
C++
488 lines
19 KiB
C++
//===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
|
|
#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
|
|
|
|
#include "clang/Basic/LLVM.h"
|
|
#include "clang/Lex/DependencyDirectivesScanner.h"
|
|
#include "llvm/ADT/DenseMap.h"
|
|
#include "llvm/ADT/StringMap.h"
|
|
#include "llvm/Support/Allocator.h"
|
|
#include "llvm/Support/ErrorOr.h"
|
|
#include "llvm/Support/VirtualFileSystem.h"
|
|
#include <mutex>
|
|
#include <optional>
|
|
|
|
namespace clang {
|
|
namespace tooling {
|
|
namespace dependencies {
|
|
|
|
using DependencyDirectivesTy =
|
|
SmallVector<dependency_directives_scan::Directive, 20>;
|
|
|
|
/// Contents and directive tokens of a cached file entry. Single instance can
|
|
/// be shared between multiple entries.
|
|
struct CachedFileContents {
|
|
CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents)
|
|
: Original(std::move(Contents)), DepDirectives(nullptr) {}
|
|
|
|
/// Owning storage for the original contents.
|
|
std::unique_ptr<llvm::MemoryBuffer> Original;
|
|
|
|
/// The mutex that must be locked before mutating directive tokens.
|
|
std::mutex ValueLock;
|
|
SmallVector<dependency_directives_scan::Token, 10> DepDirectiveTokens;
|
|
/// Accessor to the directive tokens that's atomic to avoid data races.
|
|
/// \p CachedFileContents has ownership of the pointer.
|
|
std::atomic<const std::optional<DependencyDirectivesTy> *> DepDirectives;
|
|
|
|
~CachedFileContents() { delete DepDirectives.load(); }
|
|
};
|
|
|
|
/// An in-memory representation of a file system entity that is of interest to
|
|
/// the dependency scanning filesystem.
|
|
///
|
|
/// It represents one of the following:
|
|
/// - opened file with contents and a stat value,
|
|
/// - opened file with contents, directive tokens and a stat value,
|
|
/// - directory entry with its stat value,
|
|
/// - filesystem error.
|
|
///
|
|
/// Single instance of this class can be shared across different filenames (e.g.
|
|
/// a regular file and a symlink). For this reason the status filename is empty
|
|
/// and is only materialized by \c EntryRef that knows the requested filename.
|
|
class CachedFileSystemEntry {
|
|
public:
|
|
/// Creates an entry without contents: either a filesystem error or
|
|
/// a directory with stat value.
|
|
CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)
|
|
: MaybeStat(std::move(Stat)), Contents(nullptr) {
|
|
clearStatName();
|
|
}
|
|
|
|
/// Creates an entry representing a file with contents.
|
|
CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,
|
|
CachedFileContents *Contents)
|
|
: MaybeStat(std::move(Stat)), Contents(std::move(Contents)) {
|
|
clearStatName();
|
|
}
|
|
|
|
/// \returns True if the entry is a filesystem error.
|
|
bool isError() const { return !MaybeStat; }
|
|
|
|
/// \returns True if the current entry represents a directory.
|
|
bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); }
|
|
|
|
/// \returns Original contents of the file.
|
|
StringRef getOriginalContents() const {
|
|
assert(!isError() && "error");
|
|
assert(!MaybeStat->isDirectory() && "not a file");
|
|
assert(Contents && "contents not initialized");
|
|
return Contents->Original->getBuffer();
|
|
}
|
|
|
|
/// \returns The scanned preprocessor directive tokens of the file that are
|
|
/// used to speed up preprocessing, if available.
|
|
std::optional<ArrayRef<dependency_directives_scan::Directive>>
|
|
getDirectiveTokens() const {
|
|
assert(!isError() && "error");
|
|
assert(!isDirectory() && "not a file");
|
|
assert(Contents && "contents not initialized");
|
|
if (auto *Directives = Contents->DepDirectives.load()) {
|
|
if (Directives->has_value())
|
|
return ArrayRef<dependency_directives_scan::Directive>(**Directives);
|
|
}
|
|
return std::nullopt;
|
|
}
|
|
|
|
/// \returns The error.
|
|
std::error_code getError() const { return MaybeStat.getError(); }
|
|
|
|
/// \returns The entry status with empty filename.
|
|
llvm::vfs::Status getStatus() const {
|
|
assert(!isError() && "error");
|
|
assert(MaybeStat->getName().empty() && "stat name must be empty");
|
|
return *MaybeStat;
|
|
}
|
|
|
|
/// \returns The unique ID of the entry.
|
|
llvm::sys::fs::UniqueID getUniqueID() const {
|
|
assert(!isError() && "error");
|
|
return MaybeStat->getUniqueID();
|
|
}
|
|
|
|
/// \returns The data structure holding both contents and directive tokens.
|
|
CachedFileContents *getCachedContents() const {
|
|
assert(!isError() && "error");
|
|
assert(!isDirectory() && "not a file");
|
|
return Contents;
|
|
}
|
|
|
|
private:
|
|
void clearStatName() {
|
|
if (MaybeStat)
|
|
MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, "");
|
|
}
|
|
|
|
/// Either the filesystem error or status of the entry.
|
|
/// The filename is empty and only materialized by \c EntryRef.
|
|
llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
|
|
|
|
/// Non-owning pointer to the file contents.
|
|
///
|
|
/// We're using pointer here to keep the size of this class small. Instances
|
|
/// representing directories and filesystem errors don't hold any contents
|
|
/// anyway.
|
|
CachedFileContents *Contents;
|
|
};
|
|
|
|
using CachedRealPath = llvm::ErrorOr<std::string>;
|
|
|
|
/// This class is a shared cache, that caches the 'stat' and 'open' calls to the
|
|
/// underlying real file system, and the scanned preprocessor directives of
|
|
/// files.
|
|
///
|
|
/// It is sharded based on the hash of the key to reduce the lock contention for
|
|
/// the worker threads.
|
|
class DependencyScanningFilesystemSharedCache {
|
|
public:
|
|
struct CacheShard {
|
|
/// The mutex that needs to be locked before mutation of any member.
|
|
mutable std::mutex CacheLock;
|
|
|
|
/// Map from filenames to cached entries and real paths.
|
|
llvm::StringMap<
|
|
std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
|
|
llvm::BumpPtrAllocator>
|
|
CacheByFilename;
|
|
|
|
/// Map from unique IDs to cached entries.
|
|
llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
|
|
EntriesByUID;
|
|
|
|
/// The backing storage for cached entries.
|
|
llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage;
|
|
|
|
/// The backing storage for cached contents.
|
|
llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
|
|
|
|
/// The backing storage for cached real paths.
|
|
llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
|
|
|
|
/// Returns entry associated with the filename or nullptr if none is found.
|
|
const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
|
|
|
|
/// Returns entry associated with the unique ID or nullptr if none is found.
|
|
const CachedFileSystemEntry *
|
|
findEntryByUID(llvm::sys::fs::UniqueID UID) const;
|
|
|
|
/// Returns entry associated with the filename if there is some. Otherwise,
|
|
/// constructs new one with the given status, associates it with the
|
|
/// filename and returns the result.
|
|
const CachedFileSystemEntry &
|
|
getOrEmplaceEntryForFilename(StringRef Filename,
|
|
llvm::ErrorOr<llvm::vfs::Status> Stat);
|
|
|
|
/// Returns entry associated with the unique ID if there is some. Otherwise,
|
|
/// constructs new one with the given status and contents, associates it
|
|
/// with the unique ID and returns the result.
|
|
const CachedFileSystemEntry &
|
|
getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
|
|
std::unique_ptr<llvm::MemoryBuffer> Contents);
|
|
|
|
/// Returns entry associated with the filename if there is some. Otherwise,
|
|
/// associates the given entry with the filename and returns it.
|
|
const CachedFileSystemEntry &
|
|
getOrInsertEntryForFilename(StringRef Filename,
|
|
const CachedFileSystemEntry &Entry);
|
|
|
|
/// Returns the real path associated with the filename or nullptr if none is
|
|
/// found.
|
|
const CachedRealPath *findRealPathByFilename(StringRef Filename) const;
|
|
|
|
/// Returns the real path associated with the filename if there is some.
|
|
/// Otherwise, constructs new one with the given one, associates it with the
|
|
/// filename and returns the result.
|
|
const CachedRealPath &
|
|
getOrEmplaceRealPathForFilename(StringRef Filename,
|
|
llvm::ErrorOr<StringRef> RealPath);
|
|
};
|
|
|
|
DependencyScanningFilesystemSharedCache();
|
|
|
|
/// Returns shard for the given key.
|
|
CacheShard &getShardForFilename(StringRef Filename) const;
|
|
CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
|
|
|
|
private:
|
|
std::unique_ptr<CacheShard[]> CacheShards;
|
|
unsigned NumShards;
|
|
};
|
|
|
|
/// This class is a local cache, that caches the 'stat' and 'open' calls to the
|
|
/// underlying real file system.
|
|
class DependencyScanningFilesystemLocalCache {
|
|
llvm::StringMap<
|
|
std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
|
|
llvm::BumpPtrAllocator>
|
|
Cache;
|
|
|
|
public:
|
|
/// Returns entry associated with the filename or nullptr if none is found.
|
|
const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
|
|
assert(llvm::sys::path::is_absolute_gnu(Filename));
|
|
auto It = Cache.find(Filename);
|
|
return It == Cache.end() ? nullptr : It->getValue().first;
|
|
}
|
|
|
|
/// Associates the given entry with the filename and returns the given entry
|
|
/// pointer (for convenience).
|
|
const CachedFileSystemEntry &
|
|
insertEntryForFilename(StringRef Filename,
|
|
const CachedFileSystemEntry &Entry) {
|
|
assert(llvm::sys::path::is_absolute_gnu(Filename));
|
|
auto [It, Inserted] = Cache.insert({Filename, {&Entry, nullptr}});
|
|
auto &[CachedEntry, CachedRealPath] = It->getValue();
|
|
if (!Inserted) {
|
|
// The file is already present in the local cache. If we got here, it only
|
|
// contains the real path. Let's make sure the entry is populated too.
|
|
assert((!CachedEntry && CachedRealPath) && "entry already present");
|
|
CachedEntry = &Entry;
|
|
}
|
|
return *CachedEntry;
|
|
}
|
|
|
|
/// Returns real path associated with the filename or nullptr if none is
|
|
/// found.
|
|
const CachedRealPath *findRealPathByFilename(StringRef Filename) const {
|
|
assert(llvm::sys::path::is_absolute_gnu(Filename));
|
|
auto It = Cache.find(Filename);
|
|
return It == Cache.end() ? nullptr : It->getValue().second;
|
|
}
|
|
|
|
/// Associates the given real path with the filename and returns the given
|
|
/// entry pointer (for convenience).
|
|
const CachedRealPath &
|
|
insertRealPathForFilename(StringRef Filename,
|
|
const CachedRealPath &RealPath) {
|
|
assert(llvm::sys::path::is_absolute_gnu(Filename));
|
|
auto [It, Inserted] = Cache.insert({Filename, {nullptr, &RealPath}});
|
|
auto &[CachedEntry, CachedRealPath] = It->getValue();
|
|
if (!Inserted) {
|
|
// The file is already present in the local cache. If we got here, it only
|
|
// contains the entry. Let's make sure the real path is populated too.
|
|
assert((!CachedRealPath && CachedEntry) && "real path already present");
|
|
CachedRealPath = &RealPath;
|
|
}
|
|
return *CachedRealPath;
|
|
}
|
|
};
|
|
|
|
/// Reference to a CachedFileSystemEntry.
|
|
/// If the underlying entry is an opened file, this wrapper returns the file
|
|
/// contents and the scanned preprocessor directives.
|
|
class EntryRef {
|
|
/// The filename used to access this entry.
|
|
std::string Filename;
|
|
|
|
/// The underlying cached entry.
|
|
const CachedFileSystemEntry &Entry;
|
|
|
|
friend class DependencyScanningWorkerFilesystem;
|
|
|
|
public:
|
|
EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
|
|
: Filename(Name), Entry(Entry) {}
|
|
|
|
llvm::vfs::Status getStatus() const {
|
|
llvm::vfs::Status Stat = Entry.getStatus();
|
|
if (!Stat.isDirectory())
|
|
Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size());
|
|
return llvm::vfs::Status::copyWithNewName(Stat, Filename);
|
|
}
|
|
|
|
bool isError() const { return Entry.isError(); }
|
|
bool isDirectory() const { return Entry.isDirectory(); }
|
|
|
|
/// If the cached entry represents an error, promotes it into `ErrorOr`.
|
|
llvm::ErrorOr<EntryRef> unwrapError() const {
|
|
if (isError())
|
|
return Entry.getError();
|
|
return *this;
|
|
}
|
|
|
|
StringRef getContents() const { return Entry.getOriginalContents(); }
|
|
|
|
std::optional<ArrayRef<dependency_directives_scan::Directive>>
|
|
getDirectiveTokens() const {
|
|
return Entry.getDirectiveTokens();
|
|
}
|
|
};
|
|
|
|
/// A virtual file system optimized for the dependency discovery.
|
|
///
|
|
/// It is primarily designed to work with source files whose contents was
|
|
/// preprocessed to remove any tokens that are unlikely to affect the dependency
|
|
/// computation.
|
|
///
|
|
/// This is not a thread safe VFS. A single instance is meant to be used only in
|
|
/// one thread. Multiple instances are allowed to service multiple threads
|
|
/// running in parallel.
|
|
class DependencyScanningWorkerFilesystem
|
|
: public llvm::RTTIExtends<DependencyScanningWorkerFilesystem,
|
|
llvm::vfs::ProxyFileSystem> {
|
|
public:
|
|
static const char ID;
|
|
|
|
DependencyScanningWorkerFilesystem(
|
|
DependencyScanningFilesystemSharedCache &SharedCache,
|
|
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS);
|
|
|
|
llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
|
|
llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
|
|
openFileForRead(const Twine &Path) override;
|
|
|
|
std::error_code getRealPath(const Twine &Path,
|
|
SmallVectorImpl<char> &Output) override;
|
|
|
|
std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
|
|
|
|
/// Make it so that no paths bypass this VFS.
|
|
void resetBypassedPathPrefix() { BypassedPathPrefix.reset(); }
|
|
/// Set the prefix for paths that should bypass this VFS and go straight to
|
|
/// the underlying VFS.
|
|
void setBypassedPathPrefix(StringRef Prefix) { BypassedPathPrefix = Prefix; }
|
|
|
|
/// Returns entry for the given filename.
|
|
///
|
|
/// Attempts to use the local and shared caches first, then falls back to
|
|
/// using the underlying filesystem.
|
|
llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename);
|
|
|
|
/// Ensure the directive tokens are populated for this file entry.
|
|
///
|
|
/// Returns true if the directive tokens are populated for this file entry,
|
|
/// false if not (i.e. this entry is not a file or its scan fails).
|
|
bool ensureDirectiveTokensArePopulated(EntryRef Entry);
|
|
|
|
/// Check whether \p Path exists. By default checks cached result of \c
|
|
/// status(), and falls back on FS if unable to do so.
|
|
bool exists(const Twine &Path) override;
|
|
|
|
private:
|
|
/// For a filename that's not yet associated with any entry in the caches,
|
|
/// uses the underlying filesystem to either look up the entry based in the
|
|
/// shared cache indexed by unique ID, or creates new entry from scratch.
|
|
/// \p FilenameForLookup will always be an absolute path, and different than
|
|
/// \p OriginalFilename if \p OriginalFilename is relative.
|
|
llvm::ErrorOr<const CachedFileSystemEntry &>
|
|
computeAndStoreResult(StringRef OriginalFilename,
|
|
StringRef FilenameForLookup);
|
|
|
|
/// Represents a filesystem entry that has been stat-ed (and potentially read)
|
|
/// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
|
|
struct TentativeEntry {
|
|
llvm::vfs::Status Status;
|
|
std::unique_ptr<llvm::MemoryBuffer> Contents;
|
|
|
|
TentativeEntry(llvm::vfs::Status Status,
|
|
std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr)
|
|
: Status(std::move(Status)), Contents(std::move(Contents)) {}
|
|
};
|
|
|
|
/// Reads file at the given path. Enforces consistency between the file size
|
|
/// in status and size of read contents.
|
|
llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename);
|
|
|
|
/// Returns entry associated with the unique ID of the given tentative entry
|
|
/// if there is some in the shared cache. Otherwise, constructs new one,
|
|
/// associates it with the unique ID and returns the result.
|
|
const CachedFileSystemEntry &
|
|
getOrEmplaceSharedEntryForUID(TentativeEntry TEntry);
|
|
|
|
/// Returns entry associated with the filename or nullptr if none is found.
|
|
///
|
|
/// Returns entry from local cache if there is some. Otherwise, if the entry
|
|
/// is found in the shared cache, writes it through the local cache and
|
|
/// returns it. Otherwise returns nullptr.
|
|
const CachedFileSystemEntry *
|
|
findEntryByFilenameWithWriteThrough(StringRef Filename);
|
|
|
|
/// Returns entry associated with the unique ID in the shared cache or nullptr
|
|
/// if none is found.
|
|
const CachedFileSystemEntry *
|
|
findSharedEntryByUID(llvm::vfs::Status Stat) const {
|
|
return SharedCache.getShardForUID(Stat.getUniqueID())
|
|
.findEntryByUID(Stat.getUniqueID());
|
|
}
|
|
|
|
/// Associates the given entry with the filename in the local cache and
|
|
/// returns it.
|
|
const CachedFileSystemEntry &
|
|
insertLocalEntryForFilename(StringRef Filename,
|
|
const CachedFileSystemEntry &Entry) {
|
|
return LocalCache.insertEntryForFilename(Filename, Entry);
|
|
}
|
|
|
|
/// Returns entry associated with the filename in the shared cache if there is
|
|
/// some. Otherwise, constructs new one with the given error code, associates
|
|
/// it with the filename and returns the result.
|
|
const CachedFileSystemEntry &
|
|
getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) {
|
|
return SharedCache.getShardForFilename(Filename)
|
|
.getOrEmplaceEntryForFilename(Filename, EC);
|
|
}
|
|
|
|
/// Returns entry associated with the filename in the shared cache if there is
|
|
/// some. Otherwise, associates the given entry with the filename and returns
|
|
/// it.
|
|
const CachedFileSystemEntry &
|
|
getOrInsertSharedEntryForFilename(StringRef Filename,
|
|
const CachedFileSystemEntry &Entry) {
|
|
return SharedCache.getShardForFilename(Filename)
|
|
.getOrInsertEntryForFilename(Filename, Entry);
|
|
}
|
|
|
|
void printImpl(raw_ostream &OS, PrintType Type,
|
|
unsigned IndentLevel) const override {
|
|
printIndent(OS, IndentLevel);
|
|
OS << "DependencyScanningFilesystem\n";
|
|
getUnderlyingFS().print(OS, Type, IndentLevel + 1);
|
|
}
|
|
|
|
/// Whether this path should bypass this VFS and go straight to the underlying
|
|
/// VFS.
|
|
bool shouldBypass(StringRef Path) const;
|
|
|
|
/// The global cache shared between worker threads.
|
|
DependencyScanningFilesystemSharedCache &SharedCache;
|
|
/// The local cache is used by the worker thread to cache file system queries
|
|
/// locally instead of querying the global cache every time.
|
|
DependencyScanningFilesystemLocalCache LocalCache;
|
|
|
|
/// Prefix of paths that should go straight to the underlying VFS.
|
|
std::optional<std::string> BypassedPathPrefix;
|
|
|
|
/// The working directory to use for making relative paths absolute before
|
|
/// using them for cache lookups.
|
|
llvm::ErrorOr<std::string> WorkingDirForCacheLookup;
|
|
|
|
void updateWorkingDirForCacheLookup();
|
|
|
|
llvm::ErrorOr<StringRef>
|
|
tryGetFilenameForLookup(StringRef OriginalFilename,
|
|
llvm::SmallVectorImpl<char> &PathBuf) const;
|
|
};
|
|
|
|
} // end namespace dependencies
|
|
} // end namespace tooling
|
|
} // end namespace clang
|
|
|
|
#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
|