clang 20.0.0 (based on r547379) from build 12806354. Bug: http://b/379133546 Test: N/A Change-Id: I2eb8938af55d809de674be63cb30cf27e801862b Upstream-Commit: ad834e67b1105d15ef907f6255d4c96e8e733f57
1076 lines
38 KiB
C++
1076 lines
38 KiB
C++
#ifndef LLVM_PROFILEDATA_MEMPROF_H_
|
|
#define LLVM_PROFILEDATA_MEMPROF_H_
|
|
|
|
#include "llvm/ADT/MapVector.h"
|
|
#include "llvm/ADT/STLForwardCompat.h"
|
|
#include "llvm/ADT/STLFunctionalExtras.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/IR/GlobalValue.h"
|
|
#include "llvm/ProfileData/MemProfData.inc"
|
|
#include "llvm/Support/Endian.h"
|
|
#include "llvm/Support/EndianStream.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
#include <bitset>
|
|
#include <cstdint>
|
|
#include <optional>
|
|
|
|
namespace llvm {
|
|
namespace memprof {
|
|
|
|
struct MemProfRecord;
|
|
|
|
// The versions of the indexed MemProf format
|
|
enum IndexedVersion : uint64_t {
|
|
// Version 0: This version didn't have a version field.
|
|
Version0 = 0,
|
|
// Version 1: Added a version field to the header.
|
|
Version1 = 1,
|
|
// Version 2: Added a call stack table.
|
|
Version2 = 2,
|
|
// Version 3: Added a radix tree for call stacks. Switched to linear IDs for
|
|
// frames and call stacks.
|
|
Version3 = 3,
|
|
};
|
|
|
|
constexpr uint64_t MinimumSupportedVersion = Version0;
|
|
constexpr uint64_t MaximumSupportedVersion = Version3;
|
|
|
|
// Verify that the minimum and maximum satisfy the obvious constraint.
|
|
static_assert(MinimumSupportedVersion <= MaximumSupportedVersion);
|
|
|
|
enum class Meta : uint64_t {
|
|
Start = 0,
|
|
#define MIBEntryDef(NameTag, Name, Type) NameTag,
|
|
#include "llvm/ProfileData/MIBEntryDef.inc"
|
|
#undef MIBEntryDef
|
|
Size
|
|
};
|
|
|
|
using MemProfSchema = llvm::SmallVector<Meta, static_cast<int>(Meta::Size)>;
|
|
|
|
// Returns the full schema currently in use.
|
|
MemProfSchema getFullSchema();
|
|
|
|
// Returns the schema consisting of the fields used for hot cold memory hinting.
|
|
MemProfSchema getHotColdSchema();
|
|
|
|
// Holds the actual MemInfoBlock data with all fields. Contents may be read or
|
|
// written partially by providing an appropriate schema to the serialize and
|
|
// deserialize methods.
|
|
struct PortableMemInfoBlock {
|
|
PortableMemInfoBlock() = default;
|
|
explicit PortableMemInfoBlock(const MemInfoBlock &Block,
|
|
const MemProfSchema &IncomingSchema) {
|
|
for (const Meta Id : IncomingSchema)
|
|
Schema.set(llvm::to_underlying(Id));
|
|
#define MIBEntryDef(NameTag, Name, Type) Name = Block.Name;
|
|
#include "llvm/ProfileData/MIBEntryDef.inc"
|
|
#undef MIBEntryDef
|
|
}
|
|
|
|
PortableMemInfoBlock(const MemProfSchema &Schema, const unsigned char *Ptr) {
|
|
deserialize(Schema, Ptr);
|
|
}
|
|
|
|
// Read the contents of \p Ptr based on the \p Schema to populate the
|
|
// MemInfoBlock member.
|
|
void deserialize(const MemProfSchema &IncomingSchema,
|
|
const unsigned char *Ptr) {
|
|
using namespace support;
|
|
|
|
Schema.reset();
|
|
for (const Meta Id : IncomingSchema) {
|
|
switch (Id) {
|
|
#define MIBEntryDef(NameTag, Name, Type) \
|
|
case Meta::Name: { \
|
|
Name = endian::readNext<Type, llvm::endianness::little>(Ptr); \
|
|
} break;
|
|
#include "llvm/ProfileData/MIBEntryDef.inc"
|
|
#undef MIBEntryDef
|
|
default:
|
|
llvm_unreachable("Unknown meta type id, is the profile collected from "
|
|
"a newer version of the runtime?");
|
|
}
|
|
|
|
Schema.set(llvm::to_underlying(Id));
|
|
}
|
|
}
|
|
|
|
// Write the contents of the MemInfoBlock based on the \p Schema provided to
|
|
// the raw_ostream \p OS.
|
|
void serialize(const MemProfSchema &Schema, raw_ostream &OS) const {
|
|
using namespace support;
|
|
|
|
endian::Writer LE(OS, llvm::endianness::little);
|
|
for (const Meta Id : Schema) {
|
|
switch (Id) {
|
|
#define MIBEntryDef(NameTag, Name, Type) \
|
|
case Meta::Name: { \
|
|
LE.write<Type>(Name); \
|
|
} break;
|
|
#include "llvm/ProfileData/MIBEntryDef.inc"
|
|
#undef MIBEntryDef
|
|
default:
|
|
llvm_unreachable("Unknown meta type id, invalid input?");
|
|
}
|
|
}
|
|
}
|
|
|
|
// Print out the contents of the MemInfoBlock in YAML format.
|
|
void printYAML(raw_ostream &OS) const {
|
|
OS << " MemInfoBlock:\n";
|
|
#define MIBEntryDef(NameTag, Name, Type) \
|
|
OS << " " << #Name << ": " << Name << "\n";
|
|
#include "llvm/ProfileData/MIBEntryDef.inc"
|
|
#undef MIBEntryDef
|
|
if (AccessHistogramSize > 0) {
|
|
OS << " " << "AccessHistogramValues" << ":";
|
|
for (uint32_t I = 0; I < AccessHistogramSize; ++I) {
|
|
OS << " " << ((uint64_t *)AccessHistogram)[I];
|
|
}
|
|
OS << "\n";
|
|
}
|
|
}
|
|
|
|
// Return the schema, only for unit tests.
|
|
std::bitset<llvm::to_underlying(Meta::Size)> getSchema() const {
|
|
return Schema;
|
|
}
|
|
|
|
// Define getters for each type which can be called by analyses.
|
|
#define MIBEntryDef(NameTag, Name, Type) \
|
|
Type get##Name() const { \
|
|
assert(Schema[llvm::to_underlying(Meta::Name)]); \
|
|
return Name; \
|
|
}
|
|
#include "llvm/ProfileData/MIBEntryDef.inc"
|
|
#undef MIBEntryDef
|
|
|
|
void clear() { *this = PortableMemInfoBlock(); }
|
|
|
|
bool operator==(const PortableMemInfoBlock &Other) const {
|
|
if (Other.Schema != Schema)
|
|
return false;
|
|
|
|
#define MIBEntryDef(NameTag, Name, Type) \
|
|
if (Schema[llvm::to_underlying(Meta::Name)] && \
|
|
Other.get##Name() != get##Name()) \
|
|
return false;
|
|
#include "llvm/ProfileData/MIBEntryDef.inc"
|
|
#undef MIBEntryDef
|
|
return true;
|
|
}
|
|
|
|
bool operator!=(const PortableMemInfoBlock &Other) const {
|
|
return !operator==(Other);
|
|
}
|
|
|
|
static size_t serializedSize(const MemProfSchema &Schema) {
|
|
size_t Result = 0;
|
|
|
|
for (const Meta Id : Schema) {
|
|
switch (Id) {
|
|
#define MIBEntryDef(NameTag, Name, Type) \
|
|
case Meta::Name: { \
|
|
Result += sizeof(Type); \
|
|
} break;
|
|
#include "llvm/ProfileData/MIBEntryDef.inc"
|
|
#undef MIBEntryDef
|
|
default:
|
|
llvm_unreachable("Unknown meta type id, invalid input?");
|
|
}
|
|
}
|
|
|
|
return Result;
|
|
}
|
|
|
|
private:
|
|
// The set of available fields, indexed by Meta::Name.
|
|
std::bitset<llvm::to_underlying(Meta::Size)> Schema;
|
|
|
|
#define MIBEntryDef(NameTag, Name, Type) Type Name = Type();
|
|
#include "llvm/ProfileData/MIBEntryDef.inc"
|
|
#undef MIBEntryDef
|
|
};
|
|
|
|
// A type representing the id generated by hashing the contents of the Frame.
|
|
using FrameId = uint64_t;
|
|
// A type representing the id to index into the frame array.
|
|
using LinearFrameId = uint32_t;
|
|
// Describes a call frame for a dynamic allocation context. The contents of
|
|
// the frame are populated by symbolizing the stack depot call frame from the
|
|
// compiler runtime.
|
|
struct Frame {
|
|
// A uuid (uint64_t) identifying the function. It is obtained by
|
|
// llvm::md5(FunctionName) which returns the lower 64 bits.
|
|
GlobalValue::GUID Function;
|
|
// The symbol name for the function. Only populated in the Frame by the reader
|
|
// if requested during initialization. This field should not be serialized.
|
|
std::unique_ptr<std::string> SymbolName;
|
|
// The source line offset of the call from the beginning of parent function.
|
|
uint32_t LineOffset;
|
|
// The source column number of the call to help distinguish multiple calls
|
|
// on the same line.
|
|
uint32_t Column;
|
|
// Whether the current frame is inlined.
|
|
bool IsInlineFrame;
|
|
|
|
Frame(const Frame &Other) {
|
|
Function = Other.Function;
|
|
SymbolName = Other.SymbolName
|
|
? std::make_unique<std::string>(*Other.SymbolName)
|
|
: nullptr;
|
|
LineOffset = Other.LineOffset;
|
|
Column = Other.Column;
|
|
IsInlineFrame = Other.IsInlineFrame;
|
|
}
|
|
|
|
Frame(GlobalValue::GUID Hash, uint32_t Off, uint32_t Col, bool Inline)
|
|
: Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {}
|
|
|
|
bool operator==(const Frame &Other) const {
|
|
// Ignore the SymbolName field to avoid a string compare. Comparing the
|
|
// function hash serves the same purpose.
|
|
return Other.Function == Function && Other.LineOffset == LineOffset &&
|
|
Other.Column == Column && Other.IsInlineFrame == IsInlineFrame;
|
|
}
|
|
|
|
Frame &operator=(const Frame &Other) {
|
|
Function = Other.Function;
|
|
SymbolName = Other.SymbolName
|
|
? std::make_unique<std::string>(*Other.SymbolName)
|
|
: nullptr;
|
|
LineOffset = Other.LineOffset;
|
|
Column = Other.Column;
|
|
IsInlineFrame = Other.IsInlineFrame;
|
|
return *this;
|
|
}
|
|
|
|
bool operator!=(const Frame &Other) const { return !operator==(Other); }
|
|
|
|
bool hasSymbolName() const { return !!SymbolName; }
|
|
|
|
StringRef getSymbolName() const {
|
|
assert(hasSymbolName());
|
|
return *SymbolName;
|
|
}
|
|
|
|
std::string getSymbolNameOr(StringRef Alt) const {
|
|
return std::string(hasSymbolName() ? getSymbolName() : Alt);
|
|
}
|
|
|
|
// Write the contents of the frame to the ostream \p OS.
|
|
void serialize(raw_ostream &OS) const {
|
|
using namespace support;
|
|
|
|
endian::Writer LE(OS, llvm::endianness::little);
|
|
|
|
// If the type of the GlobalValue::GUID changes, then we need to update
|
|
// the reader and the writer.
|
|
static_assert(std::is_same<GlobalValue::GUID, uint64_t>::value,
|
|
"Expect GUID to be uint64_t.");
|
|
LE.write<uint64_t>(Function);
|
|
|
|
LE.write<uint32_t>(LineOffset);
|
|
LE.write<uint32_t>(Column);
|
|
LE.write<bool>(IsInlineFrame);
|
|
}
|
|
|
|
// Read a frame from char data which has been serialized as little endian.
|
|
static Frame deserialize(const unsigned char *Ptr) {
|
|
using namespace support;
|
|
|
|
const uint64_t F =
|
|
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
|
|
const uint32_t L =
|
|
endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
|
|
const uint32_t C =
|
|
endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
|
|
const bool I = endian::readNext<bool, llvm::endianness::little>(Ptr);
|
|
return Frame(/*Function=*/F, /*LineOffset=*/L, /*Column=*/C,
|
|
/*IsInlineFrame=*/I);
|
|
}
|
|
|
|
// Returns the size of the frame information.
|
|
static constexpr size_t serializedSize() {
|
|
return sizeof(Frame::Function) + sizeof(Frame::LineOffset) +
|
|
sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame);
|
|
}
|
|
|
|
// Print the frame information in YAML format.
|
|
void printYAML(raw_ostream &OS) const {
|
|
OS << " -\n"
|
|
<< " Function: " << Function << "\n"
|
|
<< " SymbolName: " << getSymbolNameOr("<None>") << "\n"
|
|
<< " LineOffset: " << LineOffset << "\n"
|
|
<< " Column: " << Column << "\n"
|
|
<< " Inline: " << IsInlineFrame << "\n";
|
|
}
|
|
|
|
// Return a hash value based on the contents of the frame. Here we don't use
|
|
// hashing from llvm ADT since we are going to persist the hash id, the hash
|
|
// combine algorithm in ADT uses a new randomized seed each time.
|
|
inline FrameId hash() const {
|
|
auto HashCombine = [](auto Value, size_t Seed) {
|
|
std::hash<decltype(Value)> Hasher;
|
|
// The constant used below is the 64 bit representation of the fractional
|
|
// part of the golden ratio. Used here for the randomness in their bit
|
|
// pattern.
|
|
return Hasher(Value) + 0x9e3779b97f4a7c15 + (Seed << 6) + (Seed >> 2);
|
|
};
|
|
|
|
size_t Result = 0;
|
|
Result ^= HashCombine(Function, Result);
|
|
Result ^= HashCombine(LineOffset, Result);
|
|
Result ^= HashCombine(Column, Result);
|
|
Result ^= HashCombine(IsInlineFrame, Result);
|
|
return static_cast<FrameId>(Result);
|
|
}
|
|
};
|
|
|
|
// A type representing the index into the table of call stacks.
|
|
using CallStackId = uint64_t;
|
|
|
|
// A type representing the index into the call stack array.
|
|
using LinearCallStackId = uint32_t;
|
|
|
|
// Holds allocation information in a space efficient format where frames are
|
|
// represented using unique identifiers.
|
|
struct IndexedAllocationInfo {
|
|
// The dynamic calling context for the allocation in bottom-up (leaf-to-root)
|
|
// order. Frame contents are stored out-of-line.
|
|
// TODO: Remove once we fully transition to CSId.
|
|
llvm::SmallVector<FrameId> CallStack;
|
|
// Conceptually the same as above. We are going to keep both CallStack and
|
|
// CallStackId while we are transitioning from CallStack to CallStackId.
|
|
CallStackId CSId = 0;
|
|
// The statistics obtained from the runtime for the allocation.
|
|
PortableMemInfoBlock Info;
|
|
|
|
IndexedAllocationInfo() = default;
|
|
IndexedAllocationInfo(ArrayRef<FrameId> CS, CallStackId CSId,
|
|
const MemInfoBlock &MB,
|
|
const MemProfSchema &Schema = getFullSchema())
|
|
: CallStack(CS), CSId(CSId), Info(MB, Schema) {}
|
|
|
|
// Returns the size in bytes when this allocation info struct is serialized.
|
|
size_t serializedSize(const MemProfSchema &Schema,
|
|
IndexedVersion Version) const;
|
|
|
|
bool operator==(const IndexedAllocationInfo &Other) const {
|
|
if (Other.Info != Info)
|
|
return false;
|
|
|
|
if (Other.CSId != CSId)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
bool operator!=(const IndexedAllocationInfo &Other) const {
|
|
return !operator==(Other);
|
|
}
|
|
};
|
|
|
|
// Holds allocation information with frame contents inline. The type should
|
|
// be used for temporary in-memory instances.
|
|
struct AllocationInfo {
|
|
// Same as IndexedAllocationInfo::CallStack with the frame contents inline.
|
|
std::vector<Frame> CallStack;
|
|
// Same as IndexedAllocationInfo::Info;
|
|
PortableMemInfoBlock Info;
|
|
|
|
AllocationInfo() = default;
|
|
AllocationInfo(
|
|
const IndexedAllocationInfo &IndexedAI,
|
|
llvm::function_ref<const Frame(const FrameId)> IdToFrameCallback) {
|
|
for (const FrameId &Id : IndexedAI.CallStack) {
|
|
CallStack.push_back(IdToFrameCallback(Id));
|
|
}
|
|
Info = IndexedAI.Info;
|
|
}
|
|
|
|
void printYAML(raw_ostream &OS) const {
|
|
OS << " -\n";
|
|
OS << " Callstack:\n";
|
|
// TODO: Print out the frame on one line with to make it easier for deep
|
|
// callstacks once we have a test to check valid YAML is generated.
|
|
for (const Frame &F : CallStack) {
|
|
F.printYAML(OS);
|
|
}
|
|
Info.printYAML(OS);
|
|
}
|
|
};
|
|
|
|
// Holds the memprof profile information for a function. The internal
|
|
// representation stores frame ids for efficiency. This representation should
|
|
// be used in the profile conversion and manipulation tools.
|
|
struct IndexedMemProfRecord {
|
|
// Memory allocation sites in this function for which we have memory
|
|
// profiling data.
|
|
llvm::SmallVector<IndexedAllocationInfo> AllocSites;
|
|
// Holds call sites in this function which are part of some memory
|
|
// allocation context. We store this as a list of locations, each with its
|
|
// list of inline locations in bottom-up order i.e. from leaf to root. The
|
|
// inline location list may include additional entries, users should pick
|
|
// the last entry in the list with the same function GUID.
|
|
llvm::SmallVector<llvm::SmallVector<FrameId>> CallSites;
|
|
// Conceptually the same as above. We are going to keep both CallSites and
|
|
// CallSiteIds while we are transitioning from CallSites to CallSiteIds.
|
|
llvm::SmallVector<CallStackId> CallSiteIds;
|
|
|
|
void clear() {
|
|
AllocSites.clear();
|
|
CallSites.clear();
|
|
}
|
|
|
|
void merge(const IndexedMemProfRecord &Other) {
|
|
// TODO: Filter out duplicates which may occur if multiple memprof
|
|
// profiles are merged together using llvm-profdata.
|
|
AllocSites.append(Other.AllocSites);
|
|
CallSites.append(Other.CallSites);
|
|
}
|
|
|
|
size_t serializedSize(const MemProfSchema &Schema,
|
|
IndexedVersion Version) const;
|
|
|
|
bool operator==(const IndexedMemProfRecord &Other) const {
|
|
if (Other.AllocSites != AllocSites)
|
|
return false;
|
|
|
|
if (Other.CallSiteIds != CallSiteIds)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
// Serializes the memprof records in \p Records to the ostream \p OS based
|
|
// on the schema provided in \p Schema.
|
|
void serialize(const MemProfSchema &Schema, raw_ostream &OS,
|
|
IndexedVersion Version,
|
|
llvm::DenseMap<CallStackId, LinearCallStackId>
|
|
*MemProfCallStackIndexes = nullptr) const;
|
|
|
|
// Deserializes memprof records from the Buffer.
|
|
static IndexedMemProfRecord deserialize(const MemProfSchema &Schema,
|
|
const unsigned char *Buffer,
|
|
IndexedVersion Version);
|
|
|
|
// Convert IndexedMemProfRecord to MemProfRecord. Callback is used to
|
|
// translate CallStackId to call stacks with frames inline.
|
|
MemProfRecord toMemProfRecord(
|
|
llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const;
|
|
|
|
// Returns the GUID for the function name after canonicalization. For
|
|
// memprof, we remove any .llvm suffix added by LTO. MemProfRecords are
|
|
// mapped to functions using this GUID.
|
|
static GlobalValue::GUID getGUID(const StringRef FunctionName);
|
|
};
|
|
|
|
// Holds the memprof profile information for a function. The internal
|
|
// representation stores frame contents inline. This representation should
|
|
// be used for small amount of temporary, in memory instances.
|
|
struct MemProfRecord {
|
|
// Same as IndexedMemProfRecord::AllocSites with frame contents inline.
|
|
llvm::SmallVector<AllocationInfo> AllocSites;
|
|
// Same as IndexedMemProfRecord::CallSites with frame contents inline.
|
|
llvm::SmallVector<std::vector<Frame>> CallSites;
|
|
|
|
MemProfRecord() = default;
|
|
MemProfRecord(
|
|
const IndexedMemProfRecord &Record,
|
|
llvm::function_ref<const Frame(const FrameId Id)> IdToFrameCallback) {
|
|
for (const IndexedAllocationInfo &IndexedAI : Record.AllocSites) {
|
|
AllocSites.emplace_back(IndexedAI, IdToFrameCallback);
|
|
}
|
|
for (const ArrayRef<FrameId> Site : Record.CallSites) {
|
|
std::vector<Frame> Frames;
|
|
for (const FrameId Id : Site) {
|
|
Frames.push_back(IdToFrameCallback(Id));
|
|
}
|
|
CallSites.push_back(Frames);
|
|
}
|
|
}
|
|
|
|
// Prints out the contents of the memprof record in YAML.
|
|
void print(llvm::raw_ostream &OS) const {
|
|
if (!AllocSites.empty()) {
|
|
OS << " AllocSites:\n";
|
|
for (const AllocationInfo &N : AllocSites)
|
|
N.printYAML(OS);
|
|
}
|
|
|
|
if (!CallSites.empty()) {
|
|
OS << " CallSites:\n";
|
|
for (const std::vector<Frame> &Frames : CallSites) {
|
|
for (const Frame &F : Frames) {
|
|
OS << " -\n";
|
|
F.printYAML(OS);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
// Reads a memprof schema from a buffer. All entries in the buffer are
|
|
// interpreted as uint64_t. The first entry in the buffer denotes the number of
|
|
// ids in the schema. Subsequent entries are integers which map to memprof::Meta
|
|
// enum class entries. After successfully reading the schema, the pointer is one
|
|
// byte past the schema contents.
|
|
Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer);
|
|
|
|
// Trait for reading IndexedMemProfRecord data from the on-disk hash table.
|
|
class RecordLookupTrait {
|
|
public:
|
|
using data_type = const IndexedMemProfRecord &;
|
|
using internal_key_type = uint64_t;
|
|
using external_key_type = uint64_t;
|
|
using hash_value_type = uint64_t;
|
|
using offset_type = uint64_t;
|
|
|
|
RecordLookupTrait() = delete;
|
|
RecordLookupTrait(IndexedVersion V, const MemProfSchema &S)
|
|
: Version(V), Schema(S) {}
|
|
|
|
static bool EqualKey(uint64_t A, uint64_t B) { return A == B; }
|
|
static uint64_t GetInternalKey(uint64_t K) { return K; }
|
|
static uint64_t GetExternalKey(uint64_t K) { return K; }
|
|
|
|
hash_value_type ComputeHash(uint64_t K) { return K; }
|
|
|
|
static std::pair<offset_type, offset_type>
|
|
ReadKeyDataLength(const unsigned char *&D) {
|
|
using namespace support;
|
|
|
|
offset_type KeyLen =
|
|
endian::readNext<offset_type, llvm::endianness::little>(D);
|
|
offset_type DataLen =
|
|
endian::readNext<offset_type, llvm::endianness::little>(D);
|
|
return std::make_pair(KeyLen, DataLen);
|
|
}
|
|
|
|
uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
|
|
using namespace support;
|
|
return endian::readNext<external_key_type, llvm::endianness::little>(D);
|
|
}
|
|
|
|
data_type ReadData(uint64_t K, const unsigned char *D,
|
|
offset_type /*Unused*/) {
|
|
Record = IndexedMemProfRecord::deserialize(Schema, D, Version);
|
|
return Record;
|
|
}
|
|
|
|
private:
|
|
// Holds the MemProf version.
|
|
IndexedVersion Version;
|
|
// Holds the memprof schema used to deserialize records.
|
|
MemProfSchema Schema;
|
|
// Holds the records from one function deserialized from the indexed format.
|
|
IndexedMemProfRecord Record;
|
|
};
|
|
|
|
// Trait for writing IndexedMemProfRecord data to the on-disk hash table.
|
|
class RecordWriterTrait {
|
|
public:
|
|
using key_type = uint64_t;
|
|
using key_type_ref = uint64_t;
|
|
|
|
using data_type = IndexedMemProfRecord;
|
|
using data_type_ref = IndexedMemProfRecord &;
|
|
|
|
using hash_value_type = uint64_t;
|
|
using offset_type = uint64_t;
|
|
|
|
private:
|
|
// Pointer to the memprof schema to use for the generator.
|
|
const MemProfSchema *Schema;
|
|
// The MemProf version to use for the serialization.
|
|
IndexedVersion Version;
|
|
|
|
// Mappings from CallStackId to the indexes into the call stack array.
|
|
llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes;
|
|
|
|
public:
|
|
// We do not support the default constructor, which does not set Version.
|
|
RecordWriterTrait() = delete;
|
|
RecordWriterTrait(
|
|
const MemProfSchema *Schema, IndexedVersion V,
|
|
llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes)
|
|
: Schema(Schema), Version(V),
|
|
MemProfCallStackIndexes(MemProfCallStackIndexes) {}
|
|
|
|
static hash_value_type ComputeHash(key_type_ref K) { return K; }
|
|
|
|
std::pair<offset_type, offset_type>
|
|
EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
|
|
using namespace support;
|
|
|
|
endian::Writer LE(Out, llvm::endianness::little);
|
|
offset_type N = sizeof(K);
|
|
LE.write<offset_type>(N);
|
|
offset_type M = V.serializedSize(*Schema, Version);
|
|
LE.write<offset_type>(M);
|
|
return std::make_pair(N, M);
|
|
}
|
|
|
|
void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
|
|
using namespace support;
|
|
endian::Writer LE(Out, llvm::endianness::little);
|
|
LE.write<uint64_t>(K);
|
|
}
|
|
|
|
void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
|
|
offset_type /*Unused*/) {
|
|
assert(Schema != nullptr && "MemProf schema is not initialized!");
|
|
V.serialize(*Schema, Out, Version, MemProfCallStackIndexes);
|
|
// Clear the IndexedMemProfRecord which results in clearing/freeing its
|
|
// vectors of allocs and callsites. This is owned by the associated on-disk
|
|
// hash table, but unused after this point. See also the comment added to
|
|
// the client which constructs the on-disk hash table for this trait.
|
|
V.clear();
|
|
}
|
|
};
|
|
|
|
// Trait for writing frame mappings to the on-disk hash table.
|
|
class FrameWriterTrait {
|
|
public:
|
|
using key_type = FrameId;
|
|
using key_type_ref = FrameId;
|
|
|
|
using data_type = Frame;
|
|
using data_type_ref = Frame &;
|
|
|
|
using hash_value_type = FrameId;
|
|
using offset_type = uint64_t;
|
|
|
|
static hash_value_type ComputeHash(key_type_ref K) { return K; }
|
|
|
|
static std::pair<offset_type, offset_type>
|
|
EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
|
|
using namespace support;
|
|
endian::Writer LE(Out, llvm::endianness::little);
|
|
offset_type N = sizeof(K);
|
|
LE.write<offset_type>(N);
|
|
offset_type M = V.serializedSize();
|
|
LE.write<offset_type>(M);
|
|
return std::make_pair(N, M);
|
|
}
|
|
|
|
void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
|
|
using namespace support;
|
|
endian::Writer LE(Out, llvm::endianness::little);
|
|
LE.write<key_type>(K);
|
|
}
|
|
|
|
void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
|
|
offset_type /*Unused*/) {
|
|
V.serialize(Out);
|
|
}
|
|
};
|
|
|
|
// Trait for reading frame mappings from the on-disk hash table.
|
|
class FrameLookupTrait {
|
|
public:
|
|
using data_type = const Frame;
|
|
using internal_key_type = FrameId;
|
|
using external_key_type = FrameId;
|
|
using hash_value_type = FrameId;
|
|
using offset_type = uint64_t;
|
|
|
|
static bool EqualKey(internal_key_type A, internal_key_type B) {
|
|
return A == B;
|
|
}
|
|
static uint64_t GetInternalKey(internal_key_type K) { return K; }
|
|
static uint64_t GetExternalKey(external_key_type K) { return K; }
|
|
|
|
hash_value_type ComputeHash(internal_key_type K) { return K; }
|
|
|
|
static std::pair<offset_type, offset_type>
|
|
ReadKeyDataLength(const unsigned char *&D) {
|
|
using namespace support;
|
|
|
|
offset_type KeyLen =
|
|
endian::readNext<offset_type, llvm::endianness::little>(D);
|
|
offset_type DataLen =
|
|
endian::readNext<offset_type, llvm::endianness::little>(D);
|
|
return std::make_pair(KeyLen, DataLen);
|
|
}
|
|
|
|
uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
|
|
using namespace support;
|
|
return endian::readNext<external_key_type, llvm::endianness::little>(D);
|
|
}
|
|
|
|
data_type ReadData(uint64_t K, const unsigned char *D,
|
|
offset_type /*Unused*/) {
|
|
return Frame::deserialize(D);
|
|
}
|
|
};
|
|
|
|
// Trait for writing call stacks to the on-disk hash table.
|
|
class CallStackWriterTrait {
|
|
public:
|
|
using key_type = CallStackId;
|
|
using key_type_ref = CallStackId;
|
|
|
|
using data_type = llvm::SmallVector<FrameId>;
|
|
using data_type_ref = llvm::SmallVector<FrameId> &;
|
|
|
|
using hash_value_type = CallStackId;
|
|
using offset_type = uint64_t;
|
|
|
|
static hash_value_type ComputeHash(key_type_ref K) { return K; }
|
|
|
|
static std::pair<offset_type, offset_type>
|
|
EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
|
|
using namespace support;
|
|
endian::Writer LE(Out, llvm::endianness::little);
|
|
// We do not explicitly emit the key length because it is a constant.
|
|
offset_type N = sizeof(K);
|
|
offset_type M = sizeof(FrameId) * V.size();
|
|
LE.write<offset_type>(M);
|
|
return std::make_pair(N, M);
|
|
}
|
|
|
|
void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
|
|
using namespace support;
|
|
endian::Writer LE(Out, llvm::endianness::little);
|
|
LE.write<key_type>(K);
|
|
}
|
|
|
|
void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
|
|
offset_type /*Unused*/) {
|
|
using namespace support;
|
|
endian::Writer LE(Out, llvm::endianness::little);
|
|
// Emit the frames. We do not explicitly emit the length of the vector
|
|
// because it can be inferred from the data length.
|
|
for (FrameId F : V)
|
|
LE.write<FrameId>(F);
|
|
}
|
|
};
|
|
|
|
// Trait for reading call stack mappings from the on-disk hash table.
|
|
class CallStackLookupTrait {
|
|
public:
|
|
using data_type = const llvm::SmallVector<FrameId>;
|
|
using internal_key_type = CallStackId;
|
|
using external_key_type = CallStackId;
|
|
using hash_value_type = CallStackId;
|
|
using offset_type = uint64_t;
|
|
|
|
static bool EqualKey(internal_key_type A, internal_key_type B) {
|
|
return A == B;
|
|
}
|
|
static uint64_t GetInternalKey(internal_key_type K) { return K; }
|
|
static uint64_t GetExternalKey(external_key_type K) { return K; }
|
|
|
|
hash_value_type ComputeHash(internal_key_type K) { return K; }
|
|
|
|
static std::pair<offset_type, offset_type>
|
|
ReadKeyDataLength(const unsigned char *&D) {
|
|
using namespace support;
|
|
|
|
// We do not explicitly read the key length because it is a constant.
|
|
offset_type KeyLen = sizeof(external_key_type);
|
|
offset_type DataLen =
|
|
endian::readNext<offset_type, llvm::endianness::little>(D);
|
|
return std::make_pair(KeyLen, DataLen);
|
|
}
|
|
|
|
uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
|
|
using namespace support;
|
|
return endian::readNext<external_key_type, llvm::endianness::little>(D);
|
|
}
|
|
|
|
data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length) {
|
|
using namespace support;
|
|
llvm::SmallVector<FrameId> CS;
|
|
// Derive the number of frames from the data length.
|
|
uint64_t NumFrames = Length / sizeof(FrameId);
|
|
assert(Length % sizeof(FrameId) == 0);
|
|
CS.reserve(NumFrames);
|
|
for (size_t I = 0; I != NumFrames; ++I) {
|
|
FrameId F = endian::readNext<FrameId, llvm::endianness::little>(D);
|
|
CS.push_back(F);
|
|
}
|
|
return CS;
|
|
}
|
|
};
|
|
|
|
// Compute a CallStackId for a given call stack.
|
|
CallStackId hashCallStack(ArrayRef<FrameId> CS);
|
|
|
|
namespace detail {
|
|
// "Dereference" the iterator from DenseMap or OnDiskChainedHashTable. We have
|
|
// to do so in one of two different ways depending on the type of the hash
|
|
// table.
|
|
template <typename value_type, typename IterTy>
|
|
value_type DerefIterator(IterTy Iter) {
|
|
using deref_type = llvm::remove_cvref_t<decltype(*Iter)>;
|
|
if constexpr (std::is_same_v<deref_type, value_type>)
|
|
return *Iter;
|
|
else
|
|
return Iter->second;
|
|
}
|
|
} // namespace detail
|
|
|
|
// A function object that returns a frame for a given FrameId.
|
|
template <typename MapTy> struct FrameIdConverter {
|
|
std::optional<FrameId> LastUnmappedId;
|
|
MapTy ⤅
|
|
|
|
FrameIdConverter() = delete;
|
|
FrameIdConverter(MapTy &Map) : Map(Map) {}
|
|
|
|
// Delete the copy constructor and copy assignment operator to avoid a
|
|
// situation where a copy of FrameIdConverter gets an error in LastUnmappedId
|
|
// while the original instance doesn't.
|
|
FrameIdConverter(const FrameIdConverter &) = delete;
|
|
FrameIdConverter &operator=(const FrameIdConverter &) = delete;
|
|
|
|
Frame operator()(FrameId Id) {
|
|
auto Iter = Map.find(Id);
|
|
if (Iter == Map.end()) {
|
|
LastUnmappedId = Id;
|
|
return Frame(0, 0, 0, false);
|
|
}
|
|
return detail::DerefIterator<Frame>(Iter);
|
|
}
|
|
};
|
|
|
|
// A function object that returns a call stack for a given CallStackId.
|
|
template <typename MapTy> struct CallStackIdConverter {
|
|
std::optional<CallStackId> LastUnmappedId;
|
|
MapTy ⤅
|
|
llvm::function_ref<Frame(FrameId)> FrameIdToFrame;
|
|
|
|
CallStackIdConverter() = delete;
|
|
CallStackIdConverter(MapTy &Map,
|
|
llvm::function_ref<Frame(FrameId)> FrameIdToFrame)
|
|
: Map(Map), FrameIdToFrame(FrameIdToFrame) {}
|
|
|
|
// Delete the copy constructor and copy assignment operator to avoid a
|
|
// situation where a copy of CallStackIdConverter gets an error in
|
|
// LastUnmappedId while the original instance doesn't.
|
|
CallStackIdConverter(const CallStackIdConverter &) = delete;
|
|
CallStackIdConverter &operator=(const CallStackIdConverter &) = delete;
|
|
|
|
std::vector<Frame> operator()(CallStackId CSId) {
|
|
std::vector<Frame> Frames;
|
|
auto CSIter = Map.find(CSId);
|
|
if (CSIter == Map.end()) {
|
|
LastUnmappedId = CSId;
|
|
} else {
|
|
llvm::SmallVector<FrameId> CS =
|
|
detail::DerefIterator<llvm::SmallVector<FrameId>>(CSIter);
|
|
Frames.reserve(CS.size());
|
|
for (FrameId Id : CS)
|
|
Frames.push_back(FrameIdToFrame(Id));
|
|
}
|
|
return Frames;
|
|
}
|
|
};
|
|
|
|
// A function object that returns a Frame stored at a given index into the Frame
|
|
// array in the profile.
|
|
struct LinearFrameIdConverter {
|
|
const unsigned char *FrameBase;
|
|
|
|
LinearFrameIdConverter() = delete;
|
|
LinearFrameIdConverter(const unsigned char *FrameBase)
|
|
: FrameBase(FrameBase) {}
|
|
|
|
Frame operator()(LinearFrameId LinearId) {
|
|
uint64_t Offset = static_cast<uint64_t>(LinearId) * Frame::serializedSize();
|
|
return Frame::deserialize(FrameBase + Offset);
|
|
}
|
|
};
|
|
|
|
// A function object that returns a call stack stored at a given index into the
|
|
// call stack array in the profile.
|
|
struct LinearCallStackIdConverter {
|
|
const unsigned char *CallStackBase;
|
|
std::function<Frame(LinearFrameId)> FrameIdToFrame;
|
|
|
|
LinearCallStackIdConverter() = delete;
|
|
LinearCallStackIdConverter(const unsigned char *CallStackBase,
|
|
std::function<Frame(LinearFrameId)> FrameIdToFrame)
|
|
: CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {}
|
|
|
|
std::vector<Frame> operator()(LinearCallStackId LinearCSId) {
|
|
std::vector<Frame> Frames;
|
|
|
|
const unsigned char *Ptr =
|
|
CallStackBase +
|
|
static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId);
|
|
uint32_t NumFrames =
|
|
support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
|
|
Frames.reserve(NumFrames);
|
|
for (; NumFrames; --NumFrames) {
|
|
LinearFrameId Elem =
|
|
support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
|
|
// Follow a pointer to the parent, if any. See comments below on
|
|
// CallStackRadixTreeBuilder for the description of the radix tree format.
|
|
if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) {
|
|
Ptr += (-Elem) * sizeof(LinearFrameId);
|
|
Elem =
|
|
support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
|
|
}
|
|
// We shouldn't encounter another pointer.
|
|
assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0);
|
|
Frames.push_back(FrameIdToFrame(Elem));
|
|
Ptr += sizeof(LinearFrameId);
|
|
}
|
|
|
|
return Frames;
|
|
}
|
|
};
|
|
|
|
struct IndexedMemProfData {
|
|
// A map to hold memprof data per function. The lower 64 bits obtained from
|
|
// the md5 hash of the function name is used to index into the map.
|
|
llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> Records;
|
|
|
|
// A map to hold frame id to frame mappings. The mappings are used to
|
|
// convert IndexedMemProfRecord to MemProfRecords with frame information
|
|
// inline.
|
|
llvm::MapVector<FrameId, Frame> Frames;
|
|
|
|
// A map to hold call stack id to call stacks.
|
|
llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> CallStacks;
|
|
};
|
|
|
|
struct FrameStat {
|
|
// The number of occurrences of a given FrameId.
|
|
uint64_t Count = 0;
|
|
// The sum of indexes where a given FrameId shows up.
|
|
uint64_t PositionSum = 0;
|
|
};
|
|
|
|
// Compute a histogram of Frames in call stacks.
|
|
llvm::DenseMap<FrameId, FrameStat>
|
|
computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
|
|
&MemProfCallStackData);
|
|
|
|
// Construct a radix tree of call stacks.
|
|
//
|
|
// A set of call stacks might look like:
|
|
//
|
|
// CallStackId 1: f1 -> f2 -> f3
|
|
// CallStackId 2: f1 -> f2 -> f4 -> f5
|
|
// CallStackId 3: f1 -> f2 -> f4 -> f6
|
|
// CallStackId 4: f7 -> f8 -> f9
|
|
//
|
|
// where each fn refers to a stack frame.
|
|
//
|
|
// Since we expect a lot of common prefixes, we can compress the call stacks
|
|
// into a radix tree like:
|
|
//
|
|
// CallStackId 1: f1 -> f2 -> f3
|
|
// |
|
|
// CallStackId 2: +---> f4 -> f5
|
|
// |
|
|
// CallStackId 3: +---> f6
|
|
//
|
|
// CallStackId 4: f7 -> f8 -> f9
|
|
//
|
|
// Now, we are interested in retrieving call stacks for a given CallStackId, so
|
|
// we just need a pointer from a given call stack to its parent. For example,
|
|
// CallStackId 2 would point to CallStackId 1 as a parent.
|
|
//
|
|
// We serialize the radix tree above into a single array along with the length
|
|
// of each call stack and pointers to the parent call stacks.
|
|
//
|
|
// Index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
|
|
// Array: L3 f9 f8 f7 L4 f6 J3 L4 f5 f4 J3 L3 f3 f2 f1
|
|
// ^ ^ ^ ^
|
|
// | | | |
|
|
// CallStackId 4: 0 --+ | | |
|
|
// CallStackId 3: 4 --------------+ | |
|
|
// CallStackId 2: 7 -----------------------+ |
|
|
// CallStackId 1: 11 -----------------------------------+
|
|
//
|
|
// - LN indicates the length of a call stack, encoded as ordinary integer N.
|
|
//
|
|
// - JN indicates a pointer to the parent, encoded as -N.
|
|
//
|
|
// The radix tree allows us to reconstruct call stacks in the leaf-to-root
|
|
// order as we scan the array from left ro right while following pointers to
|
|
// parents along the way.
|
|
//
|
|
// For example, if we are decoding CallStackId 2, we start a forward traversal
|
|
// at Index 7, noting the call stack length of 4 and obtaining f5 and f4. When
|
|
// we see J3 at Index 10, we resume a forward traversal at Index 13 = 10 + 3,
|
|
// picking up f2 and f1. We are done after collecting 4 frames as indicated at
|
|
// the beginning of the traversal.
|
|
//
|
|
// On-disk IndexedMemProfRecord will refer to call stacks by their indexes into
|
|
// the radix tree array, so we do not explicitly encode mappings like:
|
|
// "CallStackId 1 -> 11".
|
|
class CallStackRadixTreeBuilder {
|
|
// The radix tree array.
|
|
std::vector<LinearFrameId> RadixArray;
|
|
|
|
// Mapping from CallStackIds to indexes into RadixArray.
|
|
llvm::DenseMap<CallStackId, LinearCallStackId> CallStackPos;
|
|
|
|
// In build, we partition a given call stack into two parts -- the prefix
|
|
// that's common with the previously encoded call stack and the frames beyond
|
|
// the common prefix -- the unique portion. Then we want to find out where
|
|
// the common prefix is stored in RadixArray so that we can link the unique
|
|
// portion to the common prefix. Indexes, declared below, helps with our
|
|
// needs. Intuitively, Indexes tells us where each of the previously encoded
|
|
// call stack is stored in RadixArray. More formally, Indexes satisfies:
|
|
//
|
|
// RadixArray[Indexes[I]] == Prev[I]
|
|
//
|
|
// for every I, where Prev is the the call stack in the root-to-leaf order
|
|
// previously encoded by build. (Note that Prev, as passed to
|
|
// encodeCallStack, is in the leaf-to-root order.)
|
|
//
|
|
// For example, if the call stack being encoded shares 5 frames at the root of
|
|
// the call stack with the previously encoded call stack,
|
|
// RadixArray[Indexes[0]] is the root frame of the common prefix.
|
|
// RadixArray[Indexes[5 - 1]] is the last frame of the common prefix.
|
|
std::vector<LinearCallStackId> Indexes;
|
|
|
|
using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameId>>;
|
|
|
|
// Encode a call stack into RadixArray. Return the starting index within
|
|
// RadixArray.
|
|
LinearCallStackId encodeCallStack(
|
|
const llvm::SmallVector<FrameId> *CallStack,
|
|
const llvm::SmallVector<FrameId> *Prev,
|
|
const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes);
|
|
|
|
public:
|
|
CallStackRadixTreeBuilder() = default;
|
|
|
|
// Build a radix tree array.
|
|
void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
|
|
&&MemProfCallStackData,
|
|
const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes,
|
|
llvm::DenseMap<FrameId, FrameStat> &FrameHistogram);
|
|
|
|
const std::vector<LinearFrameId> &getRadixArray() const { return RadixArray; }
|
|
|
|
llvm::DenseMap<CallStackId, LinearCallStackId> takeCallStackPos() {
|
|
return std::move(CallStackPos);
|
|
}
|
|
};
|
|
|
|
// Verify that each CallStackId is computed with hashCallStack. This function
|
|
// is intended to help transition from CallStack to CSId in
|
|
// IndexedAllocationInfo.
|
|
void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record);
|
|
|
|
// Verify that each CallStackId is computed with hashCallStack. This function
|
|
// is intended to help transition from CallStack to CSId in
|
|
// IndexedAllocationInfo.
|
|
void verifyFunctionProfileData(
|
|
const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
|
|
&FunctionProfileData);
|
|
} // namespace memprof
|
|
} // namespace llvm
|
|
|
|
#endif // LLVM_PROFILEDATA_MEMPROF_H_
|