clang-r547379/include/llvm/ProfileData/MemProf.h

#ifndef LLVM_PROFILEDATA_MEMPROF_H_
#define LLVM_PROFILEDATA_MEMPROF_H_

#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLForwardCompat.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/ProfileData/MemProfData.inc"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/raw_ostream.h"

#include <bitset>
#include <cstdint>
#include <optional>

namespace llvm {
namespace memprof {

struct MemProfRecord;

// The versions of the indexed MemProf format
enum IndexedVersion : uint64_t {
  // Version 0: This version didn't have a version field.
  Version0 = 0,
  // Version 1: Added a version field to the header.
  Version1 = 1,
  // Version 2: Added a call stack table.
  Version2 = 2,
  // Version 3: Added a radix tree for call stacks.  Switched to linear IDs for
  // frames and call stacks.
  Version3 = 3,
};

constexpr uint64_t MinimumSupportedVersion = Version0;
constexpr uint64_t MaximumSupportedVersion = Version3;

// Verify that the minimum and maximum satisfy the obvious constraint.
static_assert(MinimumSupportedVersion <= MaximumSupportedVersion);

enum class Meta : uint64_t {
  Start = 0,
#define MIBEntryDef(NameTag, Name, Type) NameTag,
#include "llvm/ProfileData/MIBEntryDef.inc"
#undef MIBEntryDef
  Size
};

using MemProfSchema = llvm::SmallVector<Meta, static_cast<int>(Meta::Size)>;

// Returns the full schema currently in use.
MemProfSchema getFullSchema();

// Returns the schema consisting of the fields used for hot cold memory hinting.
MemProfSchema getHotColdSchema();

// Holds the actual MemInfoBlock data with all fields. Contents may be read or
// written partially by providing an appropriate schema to the serialize and
// deserialize methods.
struct PortableMemInfoBlock {
  PortableMemInfoBlock() = default;
  explicit PortableMemInfoBlock(const MemInfoBlock &Block,
                                const MemProfSchema &IncomingSchema) {
    for (const Meta Id : IncomingSchema)
      Schema.set(llvm::to_underlying(Id));
#define MIBEntryDef(NameTag, Name, Type) Name = Block.Name;
#include "llvm/ProfileData/MIBEntryDef.inc"
#undef MIBEntryDef
  }

  PortableMemInfoBlock(const MemProfSchema &Schema, const unsigned char *Ptr) {
    deserialize(Schema, Ptr);
  }

  // Read the contents of \p Ptr based on the \p Schema to populate the
  // MemInfoBlock member.
  void deserialize(const MemProfSchema &IncomingSchema,
                   const unsigned char *Ptr) {
    using namespace support;

    Schema.reset();
    for (const Meta Id : IncomingSchema) {
      switch (Id) {
#define MIBEntryDef(NameTag, Name, Type)                                       \
  case Meta::Name: {                                                           \
    Name = endian::readNext<Type, llvm::endianness::little>(Ptr);              \
  } break;
#include "llvm/ProfileData/MIBEntryDef.inc"
#undef MIBEntryDef
      default:
        llvm_unreachable("Unknown meta type id, is the profile collected from "
                         "a newer version of the runtime?");
      }

      Schema.set(llvm::to_underlying(Id));
    }
  }

  // Write the contents of the MemInfoBlock based on the \p Schema provided to
  // the raw_ostream \p OS.
  void serialize(const MemProfSchema &Schema, raw_ostream &OS) const {
    using namespace support;

    endian::Writer LE(OS, llvm::endianness::little);
    for (const Meta Id : Schema) {
      switch (Id) {
#define MIBEntryDef(NameTag, Name, Type)                                       \
  case Meta::Name: {                                                           \
    LE.write<Type>(Name);                                                      \
  } break;
#include "llvm/ProfileData/MIBEntryDef.inc"
#undef MIBEntryDef
      default:
        llvm_unreachable("Unknown meta type id, invalid input?");
      }
    }
  }

  // Print out the contents of the MemInfoBlock in YAML format.
  void printYAML(raw_ostream &OS) const {
    OS << "      MemInfoBlock:\n";
#define MIBEntryDef(NameTag, Name, Type)                                       \
  OS << "        " << #Name << ": " << Name << "\n";
#include "llvm/ProfileData/MIBEntryDef.inc"
#undef MIBEntryDef
    if (AccessHistogramSize > 0) {
      OS << "        " << "AccessHistogramValues" << ":";
      for (uint32_t I = 0; I < AccessHistogramSize; ++I) {
        OS << " " << ((uint64_t *)AccessHistogram)[I];
      }
      OS << "\n";
    }
  }

  // Return the schema, only for unit tests.
  std::bitset<llvm::to_underlying(Meta::Size)> getSchema() const {
    return Schema;
  }

  // Define getters for each type which can be called by analyses.
#define MIBEntryDef(NameTag, Name, Type)                                       \
  Type get##Name() const {                                                     \
    assert(Schema[llvm::to_underlying(Meta::Name)]);                           \
    return Name;                                                               \
  }
#include "llvm/ProfileData/MIBEntryDef.inc"
#undef MIBEntryDef

  void clear() { *this = PortableMemInfoBlock(); }

  bool operator==(const PortableMemInfoBlock &Other) const {
    if (Other.Schema != Schema)
      return false;

#define MIBEntryDef(NameTag, Name, Type)                                       \
  if (Schema[llvm::to_underlying(Meta::Name)] &&                               \
      Other.get##Name() != get##Name())                                        \
    return false;
#include "llvm/ProfileData/MIBEntryDef.inc"
#undef MIBEntryDef
    return true;
  }

  bool operator!=(const PortableMemInfoBlock &Other) const {
    return !operator==(Other);
  }

  static size_t serializedSize(const MemProfSchema &Schema) {
    size_t Result = 0;

    for (const Meta Id : Schema) {
      switch (Id) {
#define MIBEntryDef(NameTag, Name, Type)                                       \
  case Meta::Name: {                                                           \
    Result += sizeof(Type);                                                    \
  } break;
#include "llvm/ProfileData/MIBEntryDef.inc"
#undef MIBEntryDef
      default:
        llvm_unreachable("Unknown meta type id, invalid input?");
      }
    }

    return Result;
  }

private:
  // The set of available fields, indexed by Meta::Name.
  std::bitset<llvm::to_underlying(Meta::Size)> Schema;

#define MIBEntryDef(NameTag, Name, Type) Type Name = Type();
#include "llvm/ProfileData/MIBEntryDef.inc"
#undef MIBEntryDef
};

// A type representing the id generated by hashing the contents of the Frame.
using FrameId = uint64_t;
// A type representing the id to index into the frame array.
using LinearFrameId = uint32_t;
// Describes a call frame for a dynamic allocation context. The contents of
// the frame are populated by symbolizing the stack depot call frame from the
// compiler runtime.
struct Frame {
  // A uuid (uint64_t) identifying the function. It is obtained by
  // llvm::md5(FunctionName) which returns the lower 64 bits.
  GlobalValue::GUID Function;
  // The symbol name for the function. Only populated in the Frame by the reader
  // if requested during initialization. This field should not be serialized.
  std::unique_ptr<std::string> SymbolName;
  // The source line offset of the call from the beginning of parent function.
  uint32_t LineOffset;
  // The source column number of the call to help distinguish multiple calls
  // on the same line.
  uint32_t Column;
  // Whether the current frame is inlined.
  bool IsInlineFrame;

  Frame(const Frame &Other) {
    Function = Other.Function;
    SymbolName = Other.SymbolName
                     ? std::make_unique<std::string>(*Other.SymbolName)
                     : nullptr;
    LineOffset = Other.LineOffset;
    Column = Other.Column;
    IsInlineFrame = Other.IsInlineFrame;
  }

  Frame(GlobalValue::GUID Hash, uint32_t Off, uint32_t Col, bool Inline)
      : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {}

  bool operator==(const Frame &Other) const {
    // Ignore the SymbolName field to avoid a string compare. Comparing the
    // function hash serves the same purpose.
    return Other.Function == Function && Other.LineOffset == LineOffset &&
           Other.Column == Column && Other.IsInlineFrame == IsInlineFrame;
  }

  Frame &operator=(const Frame &Other) {
    Function = Other.Function;
    SymbolName = Other.SymbolName
                     ? std::make_unique<std::string>(*Other.SymbolName)
                     : nullptr;
    LineOffset = Other.LineOffset;
    Column = Other.Column;
    IsInlineFrame = Other.IsInlineFrame;
    return *this;
  }

  bool operator!=(const Frame &Other) const { return !operator==(Other); }

  bool hasSymbolName() const { return !!SymbolName; }

  StringRef getSymbolName() const {
    assert(hasSymbolName());
    return *SymbolName;
  }

  std::string getSymbolNameOr(StringRef Alt) const {
    return std::string(hasSymbolName() ? getSymbolName() : Alt);
  }

  // Write the contents of the frame to the ostream \p OS.
  void serialize(raw_ostream &OS) const {
    using namespace support;

    endian::Writer LE(OS, llvm::endianness::little);

    // If the type of the GlobalValue::GUID changes, then we need to update
    // the reader and the writer.
    static_assert(std::is_same<GlobalValue::GUID, uint64_t>::value,
                  "Expect GUID to be uint64_t.");
    LE.write<uint64_t>(Function);

    LE.write<uint32_t>(LineOffset);
    LE.write<uint32_t>(Column);
    LE.write<bool>(IsInlineFrame);
  }

  // Read a frame from char data which has been serialized as little endian.
  static Frame deserialize(const unsigned char *Ptr) {
    using namespace support;

    const uint64_t F =
        endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
    const uint32_t L =
        endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
    const uint32_t C =
        endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
    const bool I = endian::readNext<bool, llvm::endianness::little>(Ptr);
    return Frame(/*Function=*/F, /*LineOffset=*/L, /*Column=*/C,
                 /*IsInlineFrame=*/I);
  }

  // Returns the size of the frame information.
  static constexpr size_t serializedSize() {
    return sizeof(Frame::Function) + sizeof(Frame::LineOffset) +
           sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame);
  }

  // Print the frame information in YAML format.
  void printYAML(raw_ostream &OS) const {
    OS << "      -\n"
       << "        Function: " << Function << "\n"
       << "        SymbolName: " << getSymbolNameOr("<None>") << "\n"
       << "        LineOffset: " << LineOffset << "\n"
       << "        Column: " << Column << "\n"
       << "        Inline: " << IsInlineFrame << "\n";
  }

  // Return a hash value based on the contents of the frame. Here we don't use
  // hashing from llvm ADT since we are going to persist the hash id, the hash
  // combine algorithm in ADT uses a new randomized seed each time.
  inline FrameId hash() const {
    auto HashCombine = [](auto Value, size_t Seed) {
      std::hash<decltype(Value)> Hasher;
      // The constant used below is the 64 bit representation of the fractional
      // part of the golden ratio. Used here for the randomness in their bit
      // pattern.
      return Hasher(Value) + 0x9e3779b97f4a7c15 + (Seed << 6) + (Seed >> 2);
    };

    size_t Result = 0;
    Result ^= HashCombine(Function, Result);
    Result ^= HashCombine(LineOffset, Result);
    Result ^= HashCombine(Column, Result);
    Result ^= HashCombine(IsInlineFrame, Result);
    return static_cast<FrameId>(Result);
  }
};

// A type representing the index into the table of call stacks.
using CallStackId = uint64_t;

// A type representing the index into the call stack array.
using LinearCallStackId = uint32_t;

// Holds allocation information in a space efficient format where frames are
// represented using unique identifiers.
struct IndexedAllocationInfo {
  // The dynamic calling context for the allocation in bottom-up (leaf-to-root)
  // order. Frame contents are stored out-of-line.
  // TODO: Remove once we fully transition to CSId.
  llvm::SmallVector<FrameId> CallStack;
  // Conceptually the same as above.  We are going to keep both CallStack and
  // CallStackId while we are transitioning from CallStack to CallStackId.
  CallStackId CSId = 0;
  // The statistics obtained from the runtime for the allocation.
  PortableMemInfoBlock Info;

  IndexedAllocationInfo() = default;
  IndexedAllocationInfo(ArrayRef<FrameId> CS, CallStackId CSId,
                        const MemInfoBlock &MB,
                        const MemProfSchema &Schema = getFullSchema())
      : CallStack(CS), CSId(CSId), Info(MB, Schema) {}

  // Returns the size in bytes when this allocation info struct is serialized.
  size_t serializedSize(const MemProfSchema &Schema,
                        IndexedVersion Version) const;

  bool operator==(const IndexedAllocationInfo &Other) const {
    if (Other.Info != Info)
      return false;

    if (Other.CSId != CSId)
      return false;
    return true;
  }

  bool operator!=(const IndexedAllocationInfo &Other) const {
    return !operator==(Other);
  }
};

// Holds allocation information with frame contents inline. The type should
// be used for temporary in-memory instances.
struct AllocationInfo {
  // Same as IndexedAllocationInfo::CallStack with the frame contents inline.
  std::vector<Frame> CallStack;
  // Same as IndexedAllocationInfo::Info;
  PortableMemInfoBlock Info;

  AllocationInfo() = default;
  AllocationInfo(
      const IndexedAllocationInfo &IndexedAI,
      llvm::function_ref<const Frame(const FrameId)> IdToFrameCallback) {
    for (const FrameId &Id : IndexedAI.CallStack) {
      CallStack.push_back(IdToFrameCallback(Id));
    }
    Info = IndexedAI.Info;
  }

  void printYAML(raw_ostream &OS) const {
    OS << "    -\n";
    OS << "      Callstack:\n";
    // TODO: Print out the frame on one line with to make it easier for deep
    // callstacks once we have a test to check valid YAML is generated.
    for (const Frame &F : CallStack) {
      F.printYAML(OS);
    }
    Info.printYAML(OS);
  }
};

// Holds the memprof profile information for a function. The internal
// representation stores frame ids for efficiency. This representation should
// be used in the profile conversion and manipulation tools.
struct IndexedMemProfRecord {
  // Memory allocation sites in this function for which we have memory
  // profiling data.
  llvm::SmallVector<IndexedAllocationInfo> AllocSites;
  // Holds call sites in this function which are part of some memory
  // allocation context. We store this as a list of locations, each with its
  // list of inline locations in bottom-up order i.e. from leaf to root. The
  // inline location list may include additional entries, users should pick
  // the last entry in the list with the same function GUID.
  llvm::SmallVector<llvm::SmallVector<FrameId>> CallSites;
  // Conceptually the same as above.  We are going to keep both CallSites and
  // CallSiteIds while we are transitioning from CallSites to CallSiteIds.
  llvm::SmallVector<CallStackId> CallSiteIds;

  void clear() {
    AllocSites.clear();
    CallSites.clear();
  }

  void merge(const IndexedMemProfRecord &Other) {
    // TODO: Filter out duplicates which may occur if multiple memprof
    // profiles are merged together using llvm-profdata.
    AllocSites.append(Other.AllocSites);
    CallSites.append(Other.CallSites);
  }

  size_t serializedSize(const MemProfSchema &Schema,
                        IndexedVersion Version) const;

  bool operator==(const IndexedMemProfRecord &Other) const {
    if (Other.AllocSites != AllocSites)
      return false;

    if (Other.CallSiteIds != CallSiteIds)
      return false;
    return true;
  }

  // Serializes the memprof records in \p Records to the ostream \p OS based
  // on the schema provided in \p Schema.
  void serialize(const MemProfSchema &Schema, raw_ostream &OS,
                 IndexedVersion Version,
                 llvm::DenseMap<CallStackId, LinearCallStackId>
                     *MemProfCallStackIndexes = nullptr) const;

  // Deserializes memprof records from the Buffer.
  static IndexedMemProfRecord deserialize(const MemProfSchema &Schema,
                                          const unsigned char *Buffer,
                                          IndexedVersion Version);

  // Convert IndexedMemProfRecord to MemProfRecord.  Callback is used to
  // translate CallStackId to call stacks with frames inline.
  MemProfRecord toMemProfRecord(
      llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const;

  // Returns the GUID for the function name after canonicalization. For
  // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are
  // mapped to functions using this GUID.
  static GlobalValue::GUID getGUID(const StringRef FunctionName);
};

// Holds the memprof profile information for a function. The internal
// representation stores frame contents inline. This representation should
// be used for small amount of temporary, in memory instances.
struct MemProfRecord {
  // Same as IndexedMemProfRecord::AllocSites with frame contents inline.
  llvm::SmallVector<AllocationInfo> AllocSites;
  // Same as IndexedMemProfRecord::CallSites with frame contents inline.
  llvm::SmallVector<std::vector<Frame>> CallSites;

  MemProfRecord() = default;
  MemProfRecord(
      const IndexedMemProfRecord &Record,
      llvm::function_ref<const Frame(const FrameId Id)> IdToFrameCallback) {
    for (const IndexedAllocationInfo &IndexedAI : Record.AllocSites) {
      AllocSites.emplace_back(IndexedAI, IdToFrameCallback);
    }
    for (const ArrayRef<FrameId> Site : Record.CallSites) {
      std::vector<Frame> Frames;
      for (const FrameId Id : Site) {
        Frames.push_back(IdToFrameCallback(Id));
      }
      CallSites.push_back(Frames);
    }
  }

  // Prints out the contents of the memprof record in YAML.
  void print(llvm::raw_ostream &OS) const {
    if (!AllocSites.empty()) {
      OS << "    AllocSites:\n";
      for (const AllocationInfo &N : AllocSites)
        N.printYAML(OS);
    }

    if (!CallSites.empty()) {
      OS << "    CallSites:\n";
      for (const std::vector<Frame> &Frames : CallSites) {
        for (const Frame &F : Frames) {
          OS << "    -\n";
          F.printYAML(OS);
        }
      }
    }
  }
};

// Reads a memprof schema from a buffer. All entries in the buffer are
// interpreted as uint64_t. The first entry in the buffer denotes the number of
// ids in the schema. Subsequent entries are integers which map to memprof::Meta
// enum class entries. After successfully reading the schema, the pointer is one
// byte past the schema contents.
Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer);

// Trait for reading IndexedMemProfRecord data from the on-disk hash table.
class RecordLookupTrait {
public:
  using data_type = const IndexedMemProfRecord &;
  using internal_key_type = uint64_t;
  using external_key_type = uint64_t;
  using hash_value_type = uint64_t;
  using offset_type = uint64_t;

  RecordLookupTrait() = delete;
  RecordLookupTrait(IndexedVersion V, const MemProfSchema &S)
      : Version(V), Schema(S) {}

  static bool EqualKey(uint64_t A, uint64_t B) { return A == B; }
  static uint64_t GetInternalKey(uint64_t K) { return K; }
  static uint64_t GetExternalKey(uint64_t K) { return K; }

  hash_value_type ComputeHash(uint64_t K) { return K; }

  static std::pair<offset_type, offset_type>
  ReadKeyDataLength(const unsigned char *&D) {
    using namespace support;

    offset_type KeyLen =
        endian::readNext<offset_type, llvm::endianness::little>(D);
    offset_type DataLen =
        endian::readNext<offset_type, llvm::endianness::little>(D);
    return std::make_pair(KeyLen, DataLen);
  }

  uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
    using namespace support;
    return endian::readNext<external_key_type, llvm::endianness::little>(D);
  }

  data_type ReadData(uint64_t K, const unsigned char *D,
                     offset_type /*Unused*/) {
    Record = IndexedMemProfRecord::deserialize(Schema, D, Version);
    return Record;
  }

private:
  // Holds the MemProf version.
  IndexedVersion Version;
  // Holds the memprof schema used to deserialize records.
  MemProfSchema Schema;
  // Holds the records from one function deserialized from the indexed format.
  IndexedMemProfRecord Record;
};

// Trait for writing IndexedMemProfRecord data to the on-disk hash table.
class RecordWriterTrait {
public:
  using key_type = uint64_t;
  using key_type_ref = uint64_t;

  using data_type = IndexedMemProfRecord;
  using data_type_ref = IndexedMemProfRecord &;

  using hash_value_type = uint64_t;
  using offset_type = uint64_t;

private:
  // Pointer to the memprof schema to use for the generator.
  const MemProfSchema *Schema;
  // The MemProf version to use for the serialization.
  IndexedVersion Version;

  // Mappings from CallStackId to the indexes into the call stack array.
  llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes;

public:
  // We do not support the default constructor, which does not set Version.
  RecordWriterTrait() = delete;
  RecordWriterTrait(
      const MemProfSchema *Schema, IndexedVersion V,
      llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes)
      : Schema(Schema), Version(V),
        MemProfCallStackIndexes(MemProfCallStackIndexes) {}

  static hash_value_type ComputeHash(key_type_ref K) { return K; }

  std::pair<offset_type, offset_type>
  EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
    using namespace support;

    endian::Writer LE(Out, llvm::endianness::little);
    offset_type N = sizeof(K);
    LE.write<offset_type>(N);
    offset_type M = V.serializedSize(*Schema, Version);
    LE.write<offset_type>(M);
    return std::make_pair(N, M);
  }

  void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
    using namespace support;
    endian::Writer LE(Out, llvm::endianness::little);
    LE.write<uint64_t>(K);
  }

  void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
                offset_type /*Unused*/) {
    assert(Schema != nullptr && "MemProf schema is not initialized!");
    V.serialize(*Schema, Out, Version, MemProfCallStackIndexes);
    // Clear the IndexedMemProfRecord which results in clearing/freeing its
    // vectors of allocs and callsites. This is owned by the associated on-disk
    // hash table, but unused after this point. See also the comment added to
    // the client which constructs the on-disk hash table for this trait.
    V.clear();
  }
};

// Trait for writing frame mappings to the on-disk hash table.
class FrameWriterTrait {
public:
  using key_type = FrameId;
  using key_type_ref = FrameId;

  using data_type = Frame;
  using data_type_ref = Frame &;

  using hash_value_type = FrameId;
  using offset_type = uint64_t;

  static hash_value_type ComputeHash(key_type_ref K) { return K; }

  static std::pair<offset_type, offset_type>
  EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
    using namespace support;
    endian::Writer LE(Out, llvm::endianness::little);
    offset_type N = sizeof(K);
    LE.write<offset_type>(N);
    offset_type M = V.serializedSize();
    LE.write<offset_type>(M);
    return std::make_pair(N, M);
  }

  void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
    using namespace support;
    endian::Writer LE(Out, llvm::endianness::little);
    LE.write<key_type>(K);
  }

  void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
                offset_type /*Unused*/) {
    V.serialize(Out);
  }
};

// Trait for reading frame mappings from the on-disk hash table.
class FrameLookupTrait {
public:
  using data_type = const Frame;
  using internal_key_type = FrameId;
  using external_key_type = FrameId;
  using hash_value_type = FrameId;
  using offset_type = uint64_t;

  static bool EqualKey(internal_key_type A, internal_key_type B) {
    return A == B;
  }
  static uint64_t GetInternalKey(internal_key_type K) { return K; }
  static uint64_t GetExternalKey(external_key_type K) { return K; }

  hash_value_type ComputeHash(internal_key_type K) { return K; }

  static std::pair<offset_type, offset_type>
  ReadKeyDataLength(const unsigned char *&D) {
    using namespace support;

    offset_type KeyLen =
        endian::readNext<offset_type, llvm::endianness::little>(D);
    offset_type DataLen =
        endian::readNext<offset_type, llvm::endianness::little>(D);
    return std::make_pair(KeyLen, DataLen);
  }

  uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
    using namespace support;
    return endian::readNext<external_key_type, llvm::endianness::little>(D);
  }

  data_type ReadData(uint64_t K, const unsigned char *D,
                     offset_type /*Unused*/) {
    return Frame::deserialize(D);
  }
};

// Trait for writing call stacks to the on-disk hash table.
class CallStackWriterTrait {
public:
  using key_type = CallStackId;
  using key_type_ref = CallStackId;

  using data_type = llvm::SmallVector<FrameId>;
  using data_type_ref = llvm::SmallVector<FrameId> &;

  using hash_value_type = CallStackId;
  using offset_type = uint64_t;

  static hash_value_type ComputeHash(key_type_ref K) { return K; }

  static std::pair<offset_type, offset_type>
  EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
    using namespace support;
    endian::Writer LE(Out, llvm::endianness::little);
    // We do not explicitly emit the key length because it is a constant.
    offset_type N = sizeof(K);
    offset_type M = sizeof(FrameId) * V.size();
    LE.write<offset_type>(M);
    return std::make_pair(N, M);
  }

  void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
    using namespace support;
    endian::Writer LE(Out, llvm::endianness::little);
    LE.write<key_type>(K);
  }

  void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
                offset_type /*Unused*/) {
    using namespace support;
    endian::Writer LE(Out, llvm::endianness::little);
    // Emit the frames.  We do not explicitly emit the length of the vector
    // because it can be inferred from the data length.
    for (FrameId F : V)
      LE.write<FrameId>(F);
  }
};

// Trait for reading call stack mappings from the on-disk hash table.
class CallStackLookupTrait {
public:
  using data_type = const llvm::SmallVector<FrameId>;
  using internal_key_type = CallStackId;
  using external_key_type = CallStackId;
  using hash_value_type = CallStackId;
  using offset_type = uint64_t;

  static bool EqualKey(internal_key_type A, internal_key_type B) {
    return A == B;
  }
  static uint64_t GetInternalKey(internal_key_type K) { return K; }
  static uint64_t GetExternalKey(external_key_type K) { return K; }

  hash_value_type ComputeHash(internal_key_type K) { return K; }

  static std::pair<offset_type, offset_type>
  ReadKeyDataLength(const unsigned char *&D) {
    using namespace support;

    // We do not explicitly read the key length because it is a constant.
    offset_type KeyLen = sizeof(external_key_type);
    offset_type DataLen =
        endian::readNext<offset_type, llvm::endianness::little>(D);
    return std::make_pair(KeyLen, DataLen);
  }

  uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
    using namespace support;
    return endian::readNext<external_key_type, llvm::endianness::little>(D);
  }

  data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length) {
    using namespace support;
    llvm::SmallVector<FrameId> CS;
    // Derive the number of frames from the data length.
    uint64_t NumFrames = Length / sizeof(FrameId);
    assert(Length % sizeof(FrameId) == 0);
    CS.reserve(NumFrames);
    for (size_t I = 0; I != NumFrames; ++I) {
      FrameId F = endian::readNext<FrameId, llvm::endianness::little>(D);
      CS.push_back(F);
    }
    return CS;
  }
};

// Compute a CallStackId for a given call stack.
CallStackId hashCallStack(ArrayRef<FrameId> CS);

namespace detail {
// "Dereference" the iterator from DenseMap or OnDiskChainedHashTable.  We have
// to do so in one of two different ways depending on the type of the hash
// table.
template <typename value_type, typename IterTy>
value_type DerefIterator(IterTy Iter) {
  using deref_type = llvm::remove_cvref_t<decltype(*Iter)>;
  if constexpr (std::is_same_v<deref_type, value_type>)
    return *Iter;
  else
    return Iter->second;
}
} // namespace detail

// A function object that returns a frame for a given FrameId.
template <typename MapTy> struct FrameIdConverter {
  std::optional<FrameId> LastUnmappedId;
  MapTy &Map;

  FrameIdConverter() = delete;
  FrameIdConverter(MapTy &Map) : Map(Map) {}

  // Delete the copy constructor and copy assignment operator to avoid a
  // situation where a copy of FrameIdConverter gets an error in LastUnmappedId
  // while the original instance doesn't.
  FrameIdConverter(const FrameIdConverter &) = delete;
  FrameIdConverter &operator=(const FrameIdConverter &) = delete;

  Frame operator()(FrameId Id) {
    auto Iter = Map.find(Id);
    if (Iter == Map.end()) {
      LastUnmappedId = Id;
      return Frame(0, 0, 0, false);
    }
    return detail::DerefIterator<Frame>(Iter);
  }
};

// A function object that returns a call stack for a given CallStackId.
template <typename MapTy> struct CallStackIdConverter {
  std::optional<CallStackId> LastUnmappedId;
  MapTy &Map;
  llvm::function_ref<Frame(FrameId)> FrameIdToFrame;

  CallStackIdConverter() = delete;
  CallStackIdConverter(MapTy &Map,
                       llvm::function_ref<Frame(FrameId)> FrameIdToFrame)
      : Map(Map), FrameIdToFrame(FrameIdToFrame) {}

  // Delete the copy constructor and copy assignment operator to avoid a
  // situation where a copy of CallStackIdConverter gets an error in
  // LastUnmappedId while the original instance doesn't.
  CallStackIdConverter(const CallStackIdConverter &) = delete;
  CallStackIdConverter &operator=(const CallStackIdConverter &) = delete;

  std::vector<Frame> operator()(CallStackId CSId) {
    std::vector<Frame> Frames;
    auto CSIter = Map.find(CSId);
    if (CSIter == Map.end()) {
      LastUnmappedId = CSId;
    } else {
      llvm::SmallVector<FrameId> CS =
          detail::DerefIterator<llvm::SmallVector<FrameId>>(CSIter);
      Frames.reserve(CS.size());
      for (FrameId Id : CS)
        Frames.push_back(FrameIdToFrame(Id));
    }
    return Frames;
  }
};

// A function object that returns a Frame stored at a given index into the Frame
// array in the profile.
struct LinearFrameIdConverter {
  const unsigned char *FrameBase;

  LinearFrameIdConverter() = delete;
  LinearFrameIdConverter(const unsigned char *FrameBase)
      : FrameBase(FrameBase) {}

  Frame operator()(LinearFrameId LinearId) {
    uint64_t Offset = static_cast<uint64_t>(LinearId) * Frame::serializedSize();
    return Frame::deserialize(FrameBase + Offset);
  }
};

// A function object that returns a call stack stored at a given index into the
// call stack array in the profile.
struct LinearCallStackIdConverter {
  const unsigned char *CallStackBase;
  std::function<Frame(LinearFrameId)> FrameIdToFrame;

  LinearCallStackIdConverter() = delete;
  LinearCallStackIdConverter(const unsigned char *CallStackBase,
                             std::function<Frame(LinearFrameId)> FrameIdToFrame)
      : CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {}

  std::vector<Frame> operator()(LinearCallStackId LinearCSId) {
    std::vector<Frame> Frames;

    const unsigned char *Ptr =
        CallStackBase +
        static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId);
    uint32_t NumFrames =
        support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
    Frames.reserve(NumFrames);
    for (; NumFrames; --NumFrames) {
      LinearFrameId Elem =
          support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
      // Follow a pointer to the parent, if any.  See comments below on
      // CallStackRadixTreeBuilder for the description of the radix tree format.
      if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) {
        Ptr += (-Elem) * sizeof(LinearFrameId);
        Elem =
            support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
      }
      // We shouldn't encounter another pointer.
      assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0);
      Frames.push_back(FrameIdToFrame(Elem));
      Ptr += sizeof(LinearFrameId);
    }

    return Frames;
  }
};

struct IndexedMemProfData {
  // A map to hold memprof data per function. The lower 64 bits obtained from
  // the md5 hash of the function name is used to index into the map.
  llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> Records;

  // A map to hold frame id to frame mappings. The mappings are used to
  // convert IndexedMemProfRecord to MemProfRecords with frame information
  // inline.
  llvm::MapVector<FrameId, Frame> Frames;

  // A map to hold call stack id to call stacks.
  llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> CallStacks;
};

struct FrameStat {
  // The number of occurrences of a given FrameId.
  uint64_t Count = 0;
  // The sum of indexes where a given FrameId shows up.
  uint64_t PositionSum = 0;
};

// Compute a histogram of Frames in call stacks.
llvm::DenseMap<FrameId, FrameStat>
computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
                          &MemProfCallStackData);

// Construct a radix tree of call stacks.
//
// A set of call stacks might look like:
//
// CallStackId 1:  f1 -> f2 -> f3
// CallStackId 2:  f1 -> f2 -> f4 -> f5
// CallStackId 3:  f1 -> f2 -> f4 -> f6
// CallStackId 4:  f7 -> f8 -> f9
//
// where each fn refers to a stack frame.
//
// Since we expect a lot of common prefixes, we can compress the call stacks
// into a radix tree like:
//
// CallStackId 1:  f1 -> f2 -> f3
//                       |
// CallStackId 2:        +---> f4 -> f5
//                             |
// CallStackId 3:              +---> f6
//
// CallStackId 4:  f7 -> f8 -> f9
//
// Now, we are interested in retrieving call stacks for a given CallStackId, so
// we just need a pointer from a given call stack to its parent.  For example,
// CallStackId 2 would point to CallStackId 1 as a parent.
//
// We serialize the radix tree above into a single array along with the length
// of each call stack and pointers to the parent call stacks.
//
// Index:              0  1  2  3  4  5  6  7  8  9 10 11 12 13 14
// Array:             L3 f9 f8 f7 L4 f6 J3 L4 f5 f4 J3 L3 f3 f2 f1
//                     ^           ^        ^           ^
//                     |           |        |           |
// CallStackId 4:  0 --+           |        |           |
// CallStackId 3:  4 --------------+        |           |
// CallStackId 2:  7 -----------------------+           |
// CallStackId 1: 11 -----------------------------------+
//
// - LN indicates the length of a call stack, encoded as ordinary integer N.
//
// - JN indicates a pointer to the parent, encoded as -N.
//
// The radix tree allows us to reconstruct call stacks in the leaf-to-root
// order as we scan the array from left ro right while following pointers to
// parents along the way.
//
// For example, if we are decoding CallStackId 2, we start a forward traversal
// at Index 7, noting the call stack length of 4 and obtaining f5 and f4.  When
// we see J3 at Index 10, we resume a forward traversal at Index 13 = 10 + 3,
// picking up f2 and f1.  We are done after collecting 4 frames as indicated at
// the beginning of the traversal.
//
// On-disk IndexedMemProfRecord will refer to call stacks by their indexes into
// the radix tree array, so we do not explicitly encode mappings like:
// "CallStackId 1 -> 11".
class CallStackRadixTreeBuilder {
  // The radix tree array.
  std::vector<LinearFrameId> RadixArray;

  // Mapping from CallStackIds to indexes into RadixArray.
  llvm::DenseMap<CallStackId, LinearCallStackId> CallStackPos;

  // In build, we partition a given call stack into two parts -- the prefix
  // that's common with the previously encoded call stack and the frames beyond
  // the common prefix -- the unique portion.  Then we want to find out where
  // the common prefix is stored in RadixArray so that we can link the unique
  // portion to the common prefix.  Indexes, declared below, helps with our
  // needs.  Intuitively, Indexes tells us where each of the previously encoded
  // call stack is stored in RadixArray.  More formally, Indexes satisfies:
  //
  //   RadixArray[Indexes[I]] == Prev[I]
  //
  // for every I, where Prev is the the call stack in the root-to-leaf order
  // previously encoded by build.  (Note that Prev, as passed to
  // encodeCallStack, is in the leaf-to-root order.)
  //
  // For example, if the call stack being encoded shares 5 frames at the root of
  // the call stack with the previously encoded call stack,
  // RadixArray[Indexes[0]] is the root frame of the common prefix.
  // RadixArray[Indexes[5 - 1]] is the last frame of the common prefix.
  std::vector<LinearCallStackId> Indexes;

  using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameId>>;

  // Encode a call stack into RadixArray.  Return the starting index within
  // RadixArray.
  LinearCallStackId encodeCallStack(
      const llvm::SmallVector<FrameId> *CallStack,
      const llvm::SmallVector<FrameId> *Prev,
      const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes);

public:
  CallStackRadixTreeBuilder() = default;

  // Build a radix tree array.
  void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
                 &&MemProfCallStackData,
             const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes,
             llvm::DenseMap<FrameId, FrameStat> &FrameHistogram);

  const std::vector<LinearFrameId> &getRadixArray() const { return RadixArray; }

  llvm::DenseMap<CallStackId, LinearCallStackId> takeCallStackPos() {
    return std::move(CallStackPos);
  }
};

// Verify that each CallStackId is computed with hashCallStack.  This function
// is intended to help transition from CallStack to CSId in
// IndexedAllocationInfo.
void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record);

// Verify that each CallStackId is computed with hashCallStack.  This function
// is intended to help transition from CallStack to CSId in
// IndexedAllocationInfo.
void verifyFunctionProfileData(
    const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
        &FunctionProfileData);
} // namespace memprof
} // namespace llvm

#endif // LLVM_PROFILEDATA_MEMPROF_H_