Files
clang-r547379/include/llvm/ADT/GenericUniformityImpl.h
Ryan Prichard 6024e5c395 Update prebuilt Clang to r547379 (20.0.0).
clang 20.0.0 (based on r547379) from build 12806354.

Bug: http://b/379133546
Test: N/A
Change-Id: I2eb8938af55d809de674be63cb30cf27e801862b

Upstream-Commit: ad834e67b1105d15ef907f6255d4c96e8e733f57
2025-11-26 14:59:46 -05:00

1375 lines
47 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//===- GenericUniformityImpl.h -----------------------*- C++ -*------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This template implementation resides in a separate file so that it
// does not get injected into every .cpp file that includes the
// generic header.
//
// DO NOT INCLUDE THIS FILE WHEN MERELY USING UNIFORMITYINFO.
//
// This file should only be included by files that implement a
// specialization of the relvant templates. Currently these are:
// - UniformityAnalysis.cpp
//
// Note: The DEBUG_TYPE macro should be defined before using this
// file so that any use of LLVM_DEBUG is associated with the
// including file rather than this file.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Implementation of uniformity analysis.
///
/// The algorithm is a fixed point iteration that starts with the assumption
/// that all control flow and all values are uniform. Starting from sources of
/// divergence (whose discovery must be implemented by a CFG- or even
/// target-specific derived class), divergence of values is propagated from
/// definition to uses in a straight-forward way. The main complexity lies in
/// the propagation of the impact of divergent control flow on the divergence of
/// values (sync dependencies).
///
/// NOTE: In general, no interface exists for a transform to update
/// (Machine)UniformityInfo. Additionally, (Machine)CycleAnalysis is a
/// transitive dependence, but it also does not provide an interface for
/// updating itself. Given that, transforms should not preserve uniformity in
/// their getAnalysisUsage() callback.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H
#define LLVM_ADT_GENERICUNIFORMITYIMPL_H
#include "llvm/ADT/GenericUniformityInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "uniformity"
namespace llvm {
/// Construct a specially modified post-order traversal of cycles.
///
/// The ModifiedPO is contructed using a virtually modified CFG as follows:
///
/// 1. The successors of pre-entry nodes (predecessors of an cycle
/// entry that are outside the cycle) are replaced by the
/// successors of the successors of the header.
/// 2. Successors of the cycle header are replaced by the exit blocks
/// of the cycle.
///
/// Effectively, we produce a depth-first numbering with the following
/// properties:
///
/// 1. Nodes after a cycle are numbered earlier than the cycle header.
/// 2. The header is numbered earlier than the nodes in the cycle.
/// 3. The numbering of the nodes within the cycle forms an interval
/// starting with the header.
///
/// Effectively, the virtual modification arranges the nodes in a
/// cycle as a DAG with the header as the sole leaf, and successors of
/// the header as the roots. A reverse traversal of this numbering has
/// the following invariant on the unmodified original CFG:
///
/// Each node is visited after all its predecessors, except if that
/// predecessor is the cycle header.
///
template <typename ContextT> class ModifiedPostOrder {
public:
using BlockT = typename ContextT::BlockT;
using FunctionT = typename ContextT::FunctionT;
using DominatorTreeT = typename ContextT::DominatorTreeT;
using CycleInfoT = GenericCycleInfo<ContextT>;
using CycleT = typename CycleInfoT::CycleT;
using const_iterator = typename std::vector<BlockT *>::const_iterator;
ModifiedPostOrder(const ContextT &C) : Context(C) {}
bool empty() const { return m_order.empty(); }
size_t size() const { return m_order.size(); }
void clear() { m_order.clear(); }
void compute(const CycleInfoT &CI);
unsigned count(BlockT *BB) const { return POIndex.count(BB); }
const BlockT *operator[](size_t idx) const { return m_order[idx]; }
void appendBlock(const BlockT &BB, bool isReducibleCycleHeader = false) {
POIndex[&BB] = m_order.size();
m_order.push_back(&BB);
LLVM_DEBUG(dbgs() << "ModifiedPO(" << POIndex[&BB]
<< "): " << Context.print(&BB) << "\n");
if (isReducibleCycleHeader)
ReducibleCycleHeaders.insert(&BB);
}
unsigned getIndex(const BlockT *BB) const {
assert(POIndex.count(BB));
return POIndex.lookup(BB);
}
bool isReducibleCycleHeader(const BlockT *BB) const {
return ReducibleCycleHeaders.contains(BB);
}
private:
SmallVector<const BlockT *> m_order;
DenseMap<const BlockT *, unsigned> POIndex;
SmallPtrSet<const BlockT *, 32> ReducibleCycleHeaders;
const ContextT &Context;
void computeCyclePO(const CycleInfoT &CI, const CycleT *Cycle,
SmallPtrSetImpl<const BlockT *> &Finalized);
void computeStackPO(SmallVectorImpl<const BlockT *> &Stack,
const CycleInfoT &CI, const CycleT *Cycle,
SmallPtrSetImpl<const BlockT *> &Finalized);
};
template <typename> class DivergencePropagator;
/// \class GenericSyncDependenceAnalysis
///
/// \brief Locate join blocks for disjoint paths starting at a divergent branch.
///
/// An analysis per divergent branch that returns the set of basic
/// blocks whose phi nodes become divergent due to divergent control.
/// These are the blocks that are reachable by two disjoint paths from
/// the branch, or cycle exits reachable along a path that is disjoint
/// from a path to the cycle latch.
// --- Above line is not a doxygen comment; intentionally left blank ---
//
// Originally implemented in SyncDependenceAnalysis.cpp for DivergenceAnalysis.
//
// The SyncDependenceAnalysis is used in the UniformityAnalysis to model
// control-induced divergence in phi nodes.
//
// -- Reference --
// The algorithm is an extension of Section 5 of
//
// An abstract interpretation for SPMD divergence
// on reducible control flow graphs.
// Julian Rosemann, Simon Moll and Sebastian Hack
// POPL '21
//
//
// -- Sync dependence --
// Sync dependence characterizes the control flow aspect of the
// propagation of branch divergence. For example,
//
// %cond = icmp slt i32 %tid, 10
// br i1 %cond, label %then, label %else
// then:
// br label %merge
// else:
// br label %merge
// merge:
// %a = phi i32 [ 0, %then ], [ 1, %else ]
//
// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
// because %tid is not on its use-def chains, %a is sync dependent on %tid
// because the branch "br i1 %cond" depends on %tid and affects which value %a
// is assigned to.
//
//
// -- Reduction to SSA construction --
// There are two disjoint paths from A to X, if a certain variant of SSA
// construction places a phi node in X under the following set-up scheme.
//
// This variant of SSA construction ignores incoming undef values.
// That is paths from the entry without a definition do not result in
// phi nodes.
//
// entry
// / \
// A \
// / \ Y
// B C /
// \ / \ /
// D E
// \ /
// F
//
// Assume that A contains a divergent branch. We are interested
// in the set of all blocks where each block is reachable from A
// via two disjoint paths. This would be the set {D, F} in this
// case.
// To generally reduce this query to SSA construction we introduce
// a virtual variable x and assign to x different values in each
// successor block of A.
//
// entry
// / \
// A \
// / \ Y
// x = 0 x = 1 /
// \ / \ /
// D E
// \ /
// F
//
// Our flavor of SSA construction for x will construct the following
//
// entry
// / \
// A \
// / \ Y
// x0 = 0 x1 = 1 /
// \ / \ /
// x2 = phi E
// \ /
// x3 = phi
//
// The blocks D and F contain phi nodes and are thus each reachable
// by two disjoins paths from A.
//
// -- Remarks --
// * In case of cycle exits we need to check for temporal divergence.
// To this end, we check whether the definition of x differs between the
// cycle exit and the cycle header (_after_ SSA construction).
//
// * In the presence of irreducible control flow, the fixed point is
// reached only after multiple iterations. This is because labels
// reaching the header of a cycle must be repropagated through the
// cycle. This is true even in a reducible cycle, since the labels
// may have been produced by a nested irreducible cycle.
//
// * Note that SyncDependenceAnalysis is not concerned with the points
// of convergence in an irreducible cycle. It's only purpose is to
// identify join blocks. The "diverged entry" criterion is
// separately applied on join blocks to determine if an entire
// irreducible cycle is assumed to be divergent.
//
// * Relevant related work:
// A simple algorithm for global data flow analysis problems.
// Matthew S. Hecht and Jeffrey D. Ullman.
// SIAM Journal on Computing, 4(4):519532, December 1975.
//
template <typename ContextT> class GenericSyncDependenceAnalysis {
public:
using BlockT = typename ContextT::BlockT;
using DominatorTreeT = typename ContextT::DominatorTreeT;
using FunctionT = typename ContextT::FunctionT;
using ValueRefT = typename ContextT::ValueRefT;
using InstructionT = typename ContextT::InstructionT;
using CycleInfoT = GenericCycleInfo<ContextT>;
using CycleT = typename CycleInfoT::CycleT;
using ConstBlockSet = SmallPtrSet<const BlockT *, 4>;
using ModifiedPO = ModifiedPostOrder<ContextT>;
// * if BlockLabels[B] == C then C is the dominating definition at
// block B
// * if BlockLabels[B] == nullptr then we haven't seen B yet
// * if BlockLabels[B] == B then:
// - B is a join point of disjoint paths from X, or,
// - B is an immediate successor of X (initial value), or,
// - B is X
using BlockLabelMap = DenseMap<const BlockT *, const BlockT *>;
/// Information discovered by the sync dependence analysis for each
/// divergent branch.
struct DivergenceDescriptor {
// Join points of diverged paths.
ConstBlockSet JoinDivBlocks;
// Divergent cycle exits
ConstBlockSet CycleDivBlocks;
// Labels assigned to blocks on diverged paths.
BlockLabelMap BlockLabels;
};
using DivergencePropagatorT = DivergencePropagator<ContextT>;
GenericSyncDependenceAnalysis(const ContextT &Context,
const DominatorTreeT &DT, const CycleInfoT &CI);
/// \brief Computes divergent join points and cycle exits caused by branch
/// divergence in \p Term.
///
/// This returns a pair of sets:
/// * The set of blocks which are reachable by disjoint paths from
/// \p Term.
/// * The set also contains cycle exits if there two disjoint paths:
/// one from \p Term to the cycle exit and another from \p Term to
/// the cycle header.
const DivergenceDescriptor &getJoinBlocks(const BlockT *DivTermBlock);
private:
static DivergenceDescriptor EmptyDivergenceDesc;
ModifiedPO CyclePO;
const DominatorTreeT &DT;
const CycleInfoT &CI;
DenseMap<const BlockT *, std::unique_ptr<DivergenceDescriptor>>
CachedControlDivDescs;
};
/// \brief Analysis that identifies uniform values in a data-parallel
/// execution.
///
/// This analysis propagates divergence in a data-parallel context
/// from sources of divergence to all users. It can be instantiated
/// for an IR that provides a suitable SSAContext.
template <typename ContextT> class GenericUniformityAnalysisImpl {
public:
using BlockT = typename ContextT::BlockT;
using FunctionT = typename ContextT::FunctionT;
using ValueRefT = typename ContextT::ValueRefT;
using ConstValueRefT = typename ContextT::ConstValueRefT;
using UseT = typename ContextT::UseT;
using InstructionT = typename ContextT::InstructionT;
using DominatorTreeT = typename ContextT::DominatorTreeT;
using CycleInfoT = GenericCycleInfo<ContextT>;
using CycleT = typename CycleInfoT::CycleT;
using SyncDependenceAnalysisT = GenericSyncDependenceAnalysis<ContextT>;
using DivergenceDescriptorT =
typename SyncDependenceAnalysisT::DivergenceDescriptor;
using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
GenericUniformityAnalysisImpl(const DominatorTreeT &DT, const CycleInfoT &CI,
const TargetTransformInfo *TTI)
: Context(CI.getSSAContext()), F(*Context.getFunction()), CI(CI),
TTI(TTI), DT(DT), SDA(Context, DT, CI) {}
void initialize();
const FunctionT &getFunction() const { return F; }
/// \brief Mark \p UniVal as a value that is always uniform.
void addUniformOverride(const InstructionT &Instr);
/// \brief Examine \p I for divergent outputs and add to the worklist.
void markDivergent(const InstructionT &I);
/// \brief Mark \p DivVal as a divergent value.
/// \returns Whether the tracked divergence state of \p DivVal changed.
bool markDivergent(ConstValueRefT DivVal);
/// \brief Mark outputs of \p Instr as divergent.
/// \returns Whether the tracked divergence state of any output has changed.
bool markDefsDivergent(const InstructionT &Instr);
/// \brief Propagate divergence to all instructions in the region.
/// Divergence is seeded by calls to \p markDivergent.
void compute();
/// \brief Whether any value was marked or analyzed to be divergent.
bool hasDivergence() const { return !DivergentValues.empty(); }
/// \brief Whether \p Val will always return a uniform value regardless of its
/// operands
bool isAlwaysUniform(const InstructionT &Instr) const;
bool hasDivergentDefs(const InstructionT &I) const;
bool isDivergent(const InstructionT &I) const {
if (I.isTerminator()) {
return DivergentTermBlocks.contains(I.getParent());
}
return hasDivergentDefs(I);
};
/// \brief Whether \p Val is divergent at its definition.
bool isDivergent(ConstValueRefT V) const { return DivergentValues.count(V); }
bool isDivergentUse(const UseT &U) const;
bool hasDivergentTerminator(const BlockT &B) const {
return DivergentTermBlocks.contains(&B);
}
void print(raw_ostream &out) const;
protected:
/// \brief Value/block pair representing a single phi input.
struct PhiInput {
ConstValueRefT value;
BlockT *predBlock;
PhiInput(ConstValueRefT value, BlockT *predBlock)
: value(value), predBlock(predBlock) {}
};
const ContextT &Context;
const FunctionT &F;
const CycleInfoT &CI;
const TargetTransformInfo *TTI = nullptr;
// Detected/marked divergent values.
DenseSet<ConstValueRefT> DivergentValues;
SmallPtrSet<const BlockT *, 32> DivergentTermBlocks;
// Internal worklist for divergence propagation.
std::vector<const InstructionT *> Worklist;
/// \brief Mark \p Term as divergent and push all Instructions that become
/// divergent as a result on the worklist.
void analyzeControlDivergence(const InstructionT &Term);
private:
const DominatorTreeT &DT;
// Recognized cycles with divergent exits.
SmallPtrSet<const CycleT *, 16> DivergentExitCycles;
// Cycles assumed to be divergent.
//
// We don't use a set here because every insertion needs an explicit
// traversal of all existing members.
SmallVector<const CycleT *> AssumedDivergent;
// The SDA links divergent branches to divergent control-flow joins.
SyncDependenceAnalysisT SDA;
// Set of known-uniform values.
SmallPtrSet<const InstructionT *, 32> UniformOverrides;
/// \brief Mark all nodes in \p JoinBlock as divergent and push them on
/// the worklist.
void taintAndPushAllDefs(const BlockT &JoinBlock);
/// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on
/// the worklist.
void taintAndPushPhiNodes(const BlockT &JoinBlock);
/// \brief Identify all Instructions that become divergent because \p DivExit
/// is a divergent cycle exit of \p DivCycle. Mark those instructions as
/// divergent and push them on the worklist.
void propagateCycleExitDivergence(const BlockT &DivExit,
const CycleT &DivCycle);
/// Mark as divergent all external uses of values defined in \p DefCycle.
void analyzeCycleExitDivergence(const CycleT &DefCycle);
/// \brief Mark as divergent all uses of \p I that are outside \p DefCycle.
void propagateTemporalDivergence(const InstructionT &I,
const CycleT &DefCycle);
/// \brief Push all users of \p Val (in the region) to the worklist.
void pushUsers(const InstructionT &I);
void pushUsers(ConstValueRefT V);
bool usesValueFromCycle(const InstructionT &I, const CycleT &DefCycle) const;
/// \brief Whether \p Def is divergent when read in \p ObservingBlock.
bool isTemporalDivergent(const BlockT &ObservingBlock,
const InstructionT &Def) const;
};
template <typename ImplT>
void GenericUniformityAnalysisImplDeleter<ImplT>::operator()(ImplT *Impl) {
delete Impl;
}
/// Compute divergence starting with a divergent branch.
template <typename ContextT> class DivergencePropagator {
public:
using BlockT = typename ContextT::BlockT;
using DominatorTreeT = typename ContextT::DominatorTreeT;
using FunctionT = typename ContextT::FunctionT;
using ValueRefT = typename ContextT::ValueRefT;
using CycleInfoT = GenericCycleInfo<ContextT>;
using CycleT = typename CycleInfoT::CycleT;
using ModifiedPO = ModifiedPostOrder<ContextT>;
using SyncDependenceAnalysisT = GenericSyncDependenceAnalysis<ContextT>;
using DivergenceDescriptorT =
typename SyncDependenceAnalysisT::DivergenceDescriptor;
using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
const ModifiedPO &CyclePOT;
const DominatorTreeT &DT;
const CycleInfoT &CI;
const BlockT &DivTermBlock;
const ContextT &Context;
// Track blocks that receive a new label. Every time we relabel a
// cycle header, we another pass over the modified post-order in
// order to propagate the header label. The bit vector also allows
// us to skip labels that have not changed.
SparseBitVector<> FreshLabels;
// divergent join and cycle exit descriptor.
std::unique_ptr<DivergenceDescriptorT> DivDesc;
BlockLabelMapT &BlockLabels;
DivergencePropagator(const ModifiedPO &CyclePOT, const DominatorTreeT &DT,
const CycleInfoT &CI, const BlockT &DivTermBlock)
: CyclePOT(CyclePOT), DT(DT), CI(CI), DivTermBlock(DivTermBlock),
Context(CI.getSSAContext()), DivDesc(new DivergenceDescriptorT),
BlockLabels(DivDesc->BlockLabels) {}
void printDefs(raw_ostream &Out) {
Out << "Propagator::BlockLabels {\n";
for (int BlockIdx = (int)CyclePOT.size() - 1; BlockIdx >= 0; --BlockIdx) {
const auto *Block = CyclePOT[BlockIdx];
const auto *Label = BlockLabels[Block];
Out << Context.print(Block) << "(" << BlockIdx << ") : ";
if (!Label) {
Out << "<null>\n";
} else {
Out << Context.print(Label) << "\n";
}
}
Out << "}\n";
}
// Push a definition (\p PushedLabel) to \p SuccBlock and return whether this
// causes a divergent join.
bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel) {
const auto *OldLabel = BlockLabels[&SuccBlock];
LLVM_DEBUG(dbgs() << "labeling " << Context.print(&SuccBlock) << ":\n"
<< "\tpushed label: " << Context.print(&PushedLabel)
<< "\n"
<< "\told label: " << Context.print(OldLabel) << "\n");
// Early exit if there is no change in the label.
if (OldLabel == &PushedLabel)
return false;
if (OldLabel != &SuccBlock) {
auto SuccIdx = CyclePOT.getIndex(&SuccBlock);
// Assigning a new label, mark this in FreshLabels.
LLVM_DEBUG(dbgs() << "\tfresh label: " << SuccIdx << "\n");
FreshLabels.set(SuccIdx);
}
// This is not a join if the succ was previously unlabeled.
if (!OldLabel) {
LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&PushedLabel)
<< "\n");
BlockLabels[&SuccBlock] = &PushedLabel;
return false;
}
// This is a new join. Label the join block as itself, and not as
// the pushed label.
LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&SuccBlock) << "\n");
BlockLabels[&SuccBlock] = &SuccBlock;
return true;
}
// visiting a virtual cycle exit edge from the cycle header --> temporal
// divergence on join
bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label) {
if (!computeJoin(ExitBlock, Label))
return false;
// Identified a divergent cycle exit
DivDesc->CycleDivBlocks.insert(&ExitBlock);
LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(&ExitBlock)
<< "\n");
return true;
}
// process \p SuccBlock with reaching definition \p Label
bool visitEdge(const BlockT &SuccBlock, const BlockT &Label) {
if (!computeJoin(SuccBlock, Label))
return false;
// Divergent, disjoint paths join.
DivDesc->JoinDivBlocks.insert(&SuccBlock);
LLVM_DEBUG(dbgs() << "\tDivergent join: " << Context.print(&SuccBlock)
<< "\n");
return true;
}
std::unique_ptr<DivergenceDescriptorT> computeJoinPoints() {
assert(DivDesc);
LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: "
<< Context.print(&DivTermBlock) << "\n");
// Early stopping criterion
int FloorIdx = CyclePOT.size() - 1;
const BlockT *FloorLabel = nullptr;
int DivTermIdx = CyclePOT.getIndex(&DivTermBlock);
// Bootstrap with branch targets
auto const *DivTermCycle = CI.getCycle(&DivTermBlock);
for (const auto *SuccBlock : successors(&DivTermBlock)) {
if (DivTermCycle && !DivTermCycle->contains(SuccBlock)) {
// If DivTerm exits the cycle immediately, computeJoin() might
// not reach SuccBlock with a different label. We need to
// check for this exit now.
DivDesc->CycleDivBlocks.insert(SuccBlock);
LLVM_DEBUG(dbgs() << "\tImmediate divergent cycle exit: "
<< Context.print(SuccBlock) << "\n");
}
auto SuccIdx = CyclePOT.getIndex(SuccBlock);
visitEdge(*SuccBlock, *SuccBlock);
FloorIdx = std::min<int>(FloorIdx, SuccIdx);
}
while (true) {
auto BlockIdx = FreshLabels.find_last();
if (BlockIdx == -1 || BlockIdx < FloorIdx)
break;
LLVM_DEBUG(dbgs() << "Current labels:\n"; printDefs(dbgs()));
FreshLabels.reset(BlockIdx);
if (BlockIdx == DivTermIdx) {
LLVM_DEBUG(dbgs() << "Skipping DivTermBlock\n");
continue;
}
const auto *Block = CyclePOT[BlockIdx];
LLVM_DEBUG(dbgs() << "visiting " << Context.print(Block) << " at index "
<< BlockIdx << "\n");
const auto *Label = BlockLabels[Block];
assert(Label);
bool CausedJoin = false;
int LoweredFloorIdx = FloorIdx;
// If the current block is the header of a reducible cycle that
// contains the divergent branch, then the label should be
// propagated to the cycle exits. Such a header is the "last
// possible join" of any disjoint paths within this cycle. This
// prevents detection of spurious joins at the entries of any
// irreducible child cycles.
//
// This conclusion about the header is true for any choice of DFS:
//
// If some DFS has a reducible cycle C with header H, then for
// any other DFS, H is the header of a cycle C' that is a
// superset of C. For a divergent branch inside the subgraph
// C, any join node inside C is either H, or some node
// encountered without passing through H.
//
auto getReducibleParent = [&](const BlockT *Block) -> const CycleT * {
if (!CyclePOT.isReducibleCycleHeader(Block))
return nullptr;
const auto *BlockCycle = CI.getCycle(Block);
if (BlockCycle->contains(&DivTermBlock))
return BlockCycle;
return nullptr;
};
if (const auto *BlockCycle = getReducibleParent(Block)) {
SmallVector<BlockT *, 4> BlockCycleExits;
BlockCycle->getExitBlocks(BlockCycleExits);
for (auto *BlockCycleExit : BlockCycleExits) {
CausedJoin |= visitCycleExitEdge(*BlockCycleExit, *Label);
LoweredFloorIdx =
std::min<int>(LoweredFloorIdx, CyclePOT.getIndex(BlockCycleExit));
}
} else {
for (const auto *SuccBlock : successors(Block)) {
CausedJoin |= visitEdge(*SuccBlock, *Label);
LoweredFloorIdx =
std::min<int>(LoweredFloorIdx, CyclePOT.getIndex(SuccBlock));
}
}
// Floor update
if (CausedJoin) {
// 1. Different labels pushed to successors
FloorIdx = LoweredFloorIdx;
} else if (FloorLabel != Label) {
// 2. No join caused BUT we pushed a label that is different than the
// last pushed label
FloorIdx = LoweredFloorIdx;
FloorLabel = Label;
}
}
LLVM_DEBUG(dbgs() << "Final labeling:\n"; printDefs(dbgs()));
// Check every cycle containing DivTermBlock for exit divergence.
// A cycle has exit divergence if the label of an exit block does
// not match the label of its header.
for (const auto *Cycle = CI.getCycle(&DivTermBlock); Cycle;
Cycle = Cycle->getParentCycle()) {
if (Cycle->isReducible()) {
// The exit divergence of a reducible cycle is recorded while
// propagating labels.
continue;
}
SmallVector<BlockT *> Exits;
Cycle->getExitBlocks(Exits);
auto *Header = Cycle->getHeader();
auto *HeaderLabel = BlockLabels[Header];
for (const auto *Exit : Exits) {
if (BlockLabels[Exit] != HeaderLabel) {
// Identified a divergent cycle exit
DivDesc->CycleDivBlocks.insert(Exit);
LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(Exit)
<< "\n");
}
}
}
return std::move(DivDesc);
}
};
template <typename ContextT>
typename llvm::GenericSyncDependenceAnalysis<ContextT>::DivergenceDescriptor
llvm::GenericSyncDependenceAnalysis<ContextT>::EmptyDivergenceDesc;
template <typename ContextT>
llvm::GenericSyncDependenceAnalysis<ContextT>::GenericSyncDependenceAnalysis(
const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
: CyclePO(Context), DT(DT), CI(CI) {
CyclePO.compute(CI);
}
template <typename ContextT>
auto llvm::GenericSyncDependenceAnalysis<ContextT>::getJoinBlocks(
const BlockT *DivTermBlock) -> const DivergenceDescriptor & {
// trivial case
if (succ_size(DivTermBlock) <= 1) {
return EmptyDivergenceDesc;
}
// already available in cache?
auto ItCached = CachedControlDivDescs.find(DivTermBlock);
if (ItCached != CachedControlDivDescs.end())
return *ItCached->second;
// compute all join points
DivergencePropagatorT Propagator(CyclePO, DT, CI, *DivTermBlock);
auto DivDesc = Propagator.computeJoinPoints();
auto printBlockSet = [&](ConstBlockSet &Blocks) {
return Printable([&](raw_ostream &Out) {
Out << "[";
ListSeparator LS;
for (const auto *BB : Blocks) {
Out << LS << CI.getSSAContext().print(BB);
}
Out << "]\n";
});
};
LLVM_DEBUG(
dbgs() << "\nResult (" << CI.getSSAContext().print(DivTermBlock)
<< "):\n JoinDivBlocks: " << printBlockSet(DivDesc->JoinDivBlocks)
<< " CycleDivBlocks: " << printBlockSet(DivDesc->CycleDivBlocks)
<< "\n");
(void)printBlockSet;
auto ItInserted =
CachedControlDivDescs.try_emplace(DivTermBlock, std::move(DivDesc));
assert(ItInserted.second);
return *ItInserted.first->second;
}
template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::markDivergent(
const InstructionT &I) {
if (isAlwaysUniform(I))
return;
bool Marked = false;
if (I.isTerminator()) {
Marked = DivergentTermBlocks.insert(I.getParent()).second;
if (Marked) {
LLVM_DEBUG(dbgs() << "marked divergent term block: "
<< Context.print(I.getParent()) << "\n");
}
} else {
Marked = markDefsDivergent(I);
}
if (Marked)
Worklist.push_back(&I);
}
template <typename ContextT>
bool GenericUniformityAnalysisImpl<ContextT>::markDivergent(
ConstValueRefT Val) {
if (DivergentValues.insert(Val).second) {
LLVM_DEBUG(dbgs() << "marked divergent: " << Context.print(Val) << "\n");
return true;
}
return false;
}
template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::addUniformOverride(
const InstructionT &Instr) {
UniformOverrides.insert(&Instr);
}
// Mark as divergent all external uses of values defined in \p DefCycle.
//
// A value V defined by a block B inside \p DefCycle may be used outside the
// cycle only if the use is a PHI in some exit block, or B dominates some exit
// block. Thus, we check uses as follows:
//
// - Check all PHIs in all exit blocks for inputs defined inside \p DefCycle.
// - For every block B inside \p DefCycle that dominates at least one exit
// block, check all uses outside \p DefCycle.
//
// FIXME: This function does not distinguish between divergent and uniform
// exits. For each divergent exit, only the values that are live at that exit
// need to be propagated as divergent at their use outside the cycle.
template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::analyzeCycleExitDivergence(
const CycleT &DefCycle) {
SmallVector<BlockT *> Exits;
DefCycle.getExitBlocks(Exits);
for (auto *Exit : Exits) {
for (auto &Phi : Exit->phis()) {
if (usesValueFromCycle(Phi, DefCycle)) {
markDivergent(Phi);
}
}
}
for (auto *BB : DefCycle.blocks()) {
if (!llvm::any_of(Exits,
[&](BlockT *Exit) { return DT.dominates(BB, Exit); }))
continue;
for (auto &II : *BB) {
propagateTemporalDivergence(II, DefCycle);
}
}
}
template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::propagateCycleExitDivergence(
const BlockT &DivExit, const CycleT &InnerDivCycle) {
LLVM_DEBUG(dbgs() << "\tpropCycleExitDiv " << Context.print(&DivExit)
<< "\n");
auto *DivCycle = &InnerDivCycle;
auto *OuterDivCycle = DivCycle;
auto *ExitLevelCycle = CI.getCycle(&DivExit);
const unsigned CycleExitDepth =
ExitLevelCycle ? ExitLevelCycle->getDepth() : 0;
// Find outer-most cycle that does not contain \p DivExit
while (DivCycle && DivCycle->getDepth() > CycleExitDepth) {
LLVM_DEBUG(dbgs() << " Found exiting cycle: "
<< Context.print(DivCycle->getHeader()) << "\n");
OuterDivCycle = DivCycle;
DivCycle = DivCycle->getParentCycle();
}
LLVM_DEBUG(dbgs() << "\tOuter-most exiting cycle: "
<< Context.print(OuterDivCycle->getHeader()) << "\n");
if (!DivergentExitCycles.insert(OuterDivCycle).second)
return;
// Exit divergence does not matter if the cycle itself is assumed to
// be divergent.
for (const auto *C : AssumedDivergent) {
if (C->contains(OuterDivCycle))
return;
}
analyzeCycleExitDivergence(*OuterDivCycle);
}
template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
const BlockT &BB) {
LLVM_DEBUG(dbgs() << "taintAndPushAllDefs " << Context.print(&BB) << "\n");
for (const auto &I : instrs(BB)) {
// Terminators do not produce values; they are divergent only if
// the condition is divergent. That is handled when the divergent
// condition is placed in the worklist.
if (I.isTerminator())
break;
markDivergent(I);
}
}
/// Mark divergent phi nodes in a join block
template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
const BlockT &JoinBlock) {
LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock)
<< "\n");
for (const auto &Phi : JoinBlock.phis()) {
// FIXME: The non-undef value is not constant per se; it just happens to be
// uniform and may not dominate this PHI. So assuming that the same value
// reaches along all incoming edges may itself be undefined behaviour. This
// particular interpretation of the undef value was added to
// DivergenceAnalysis in the following review:
//
// https://reviews.llvm.org/D19013
if (ContextT::isConstantOrUndefValuePhi(Phi))
continue;
markDivergent(Phi);
}
}
/// Add \p Candidate to \p Cycles if it is not already contained in \p Cycles.
///
/// \return true iff \p Candidate was added to \p Cycles.
template <typename CycleT>
static bool insertIfNotContained(SmallVector<CycleT *> &Cycles,
CycleT *Candidate) {
if (llvm::any_of(Cycles,
[Candidate](CycleT *C) { return C->contains(Candidate); }))
return false;
Cycles.push_back(Candidate);
return true;
}
/// Return the outermost cycle made divergent by branch outside it.
///
/// If two paths that diverged outside an irreducible cycle join
/// inside that cycle, then that whole cycle is assumed to be
/// divergent. This does not apply if the cycle is reducible.
template <typename CycleT, typename BlockT>
static const CycleT *getExtDivCycle(const CycleT *Cycle,
const BlockT *DivTermBlock,
const BlockT *JoinBlock) {
assert(Cycle);
assert(Cycle->contains(JoinBlock));
if (Cycle->contains(DivTermBlock))
return nullptr;
const auto *OriginalCycle = Cycle;
const auto *Parent = Cycle->getParentCycle();
while (Parent && !Parent->contains(DivTermBlock)) {
Cycle = Parent;
Parent = Cycle->getParentCycle();
}
// If the original cycle is not the outermost cycle, then the outermost cycle
// is irreducible. If the outermost cycle were reducible, then external
// diverged paths would not reach the original inner cycle.
(void)OriginalCycle;
assert(Cycle == OriginalCycle || !Cycle->isReducible());
if (Cycle->isReducible()) {
assert(Cycle->getHeader() == JoinBlock);
return nullptr;
}
LLVM_DEBUG(dbgs() << "cycle made divergent by external branch\n");
return Cycle;
}
/// Return the outermost cycle made divergent by branch inside it.
///
/// This checks the "diverged entry" criterion defined in the
/// docs/ConvergenceAnalysis.html.
template <typename ContextT, typename CycleT, typename BlockT,
typename DominatorTreeT>
static const CycleT *
getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
const BlockT *JoinBlock, const DominatorTreeT &DT,
ContextT &Context) {
LLVM_DEBUG(dbgs() << "examine join " << Context.print(JoinBlock)
<< " for internal branch " << Context.print(DivTermBlock)
<< "\n");
if (DT.properlyDominates(DivTermBlock, JoinBlock))
return nullptr;
// Find the smallest common cycle, if one exists.
assert(Cycle && Cycle->contains(JoinBlock));
while (Cycle && !Cycle->contains(DivTermBlock)) {
Cycle = Cycle->getParentCycle();
}
if (!Cycle || Cycle->isReducible())
return nullptr;
if (DT.properlyDominates(Cycle->getHeader(), JoinBlock))
return nullptr;
LLVM_DEBUG(dbgs() << " header " << Context.print(Cycle->getHeader())
<< " does not dominate join\n");
const auto *Parent = Cycle->getParentCycle();
while (Parent && !DT.properlyDominates(Parent->getHeader(), JoinBlock)) {
LLVM_DEBUG(dbgs() << " header " << Context.print(Parent->getHeader())
<< " does not dominate join\n");
Cycle = Parent;
Parent = Parent->getParentCycle();
}
LLVM_DEBUG(dbgs() << " cycle made divergent by internal branch\n");
return Cycle;
}
template <typename ContextT, typename CycleT, typename BlockT,
typename DominatorTreeT>
static const CycleT *
getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
const BlockT *JoinBlock, const DominatorTreeT &DT,
ContextT &Context) {
if (!Cycle)
return nullptr;
// First try to expand Cycle to the largest that contains JoinBlock
// but not DivTermBlock.
const auto *Ext = getExtDivCycle(Cycle, DivTermBlock, JoinBlock);
// Continue expanding to the largest cycle that contains both.
const auto *Int = getIntDivCycle(Cycle, DivTermBlock, JoinBlock, DT, Context);
if (Int)
return Int;
return Ext;
}
template <typename ContextT>
bool GenericUniformityAnalysisImpl<ContextT>::isTemporalDivergent(
const BlockT &ObservingBlock, const InstructionT &Def) const {
const BlockT *DefBlock = Def.getParent();
for (const CycleT *Cycle = CI.getCycle(DefBlock);
Cycle && !Cycle->contains(&ObservingBlock);
Cycle = Cycle->getParentCycle()) {
if (DivergentExitCycles.contains(Cycle)) {
return true;
}
}
return false;
}
template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::analyzeControlDivergence(
const InstructionT &Term) {
const auto *DivTermBlock = Term.getParent();
DivergentTermBlocks.insert(DivTermBlock);
LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Context.print(DivTermBlock)
<< "\n");
// Don't propagate divergence from unreachable blocks.
if (!DT.isReachableFromEntry(DivTermBlock))
return;
const auto &DivDesc = SDA.getJoinBlocks(DivTermBlock);
SmallVector<const CycleT *> DivCycles;
// Iterate over all blocks now reachable by a disjoint path join
for (const auto *JoinBlock : DivDesc.JoinDivBlocks) {
const auto *Cycle = CI.getCycle(JoinBlock);
LLVM_DEBUG(dbgs() << "visiting join block " << Context.print(JoinBlock)
<< "\n");
if (const auto *Outermost = getOutermostDivergentCycle(
Cycle, DivTermBlock, JoinBlock, DT, Context)) {
LLVM_DEBUG(dbgs() << "found divergent cycle\n");
DivCycles.push_back(Outermost);
continue;
}
taintAndPushPhiNodes(*JoinBlock);
}
// Sort by order of decreasing depth. This allows later cycles to be skipped
// because they are already contained in earlier ones.
llvm::sort(DivCycles, [](const CycleT *A, const CycleT *B) {
return A->getDepth() > B->getDepth();
});
// Cycles that are assumed divergent due to the diverged entry
// criterion potentially contain temporal divergence depending on
// the DFS chosen. Conservatively, all values produced in such a
// cycle are assumed divergent. "Cycle invariant" values may be
// assumed uniform, but that requires further analysis.
for (auto *C : DivCycles) {
if (!insertIfNotContained(AssumedDivergent, C))
continue;
LLVM_DEBUG(dbgs() << "process divergent cycle\n");
for (const BlockT *BB : C->blocks()) {
taintAndPushAllDefs(*BB);
}
}
const auto *BranchCycle = CI.getCycle(DivTermBlock);
assert(DivDesc.CycleDivBlocks.empty() || BranchCycle);
for (const auto *DivExitBlock : DivDesc.CycleDivBlocks) {
propagateCycleExitDivergence(*DivExitBlock, *BranchCycle);
}
}
template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::compute() {
// Initialize worklist.
auto DivValuesCopy = DivergentValues;
for (const auto DivVal : DivValuesCopy) {
assert(isDivergent(DivVal) && "Worklist invariant violated!");
pushUsers(DivVal);
}
// All values on the Worklist are divergent.
// Their users may not have been updated yet.
while (!Worklist.empty()) {
const InstructionT *I = Worklist.back();
Worklist.pop_back();
LLVM_DEBUG(dbgs() << "worklist pop: " << Context.print(I) << "\n");
if (I->isTerminator()) {
analyzeControlDivergence(*I);
continue;
}
// propagate value divergence to users
assert(isDivergent(*I) && "Worklist invariant violated!");
pushUsers(*I);
}
}
template <typename ContextT>
bool GenericUniformityAnalysisImpl<ContextT>::isAlwaysUniform(
const InstructionT &Instr) const {
return UniformOverrides.contains(&Instr);
}
template <typename ContextT>
GenericUniformityInfo<ContextT>::GenericUniformityInfo(
const DominatorTreeT &DT, const CycleInfoT &CI,
const TargetTransformInfo *TTI) {
DA.reset(new ImplT{DT, CI, TTI});
}
template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::print(raw_ostream &OS) const {
bool haveDivergentArgs = false;
// Control flow instructions may be divergent even if their inputs are
// uniform. Thus, although exceedingly rare, it is possible to have a program
// with no divergent values but with divergent control structures.
if (DivergentValues.empty() && DivergentTermBlocks.empty() &&
DivergentExitCycles.empty()) {
OS << "ALL VALUES UNIFORM\n";
return;
}
for (const auto &entry : DivergentValues) {
const BlockT *parent = Context.getDefBlock(entry);
if (!parent) {
if (!haveDivergentArgs) {
OS << "DIVERGENT ARGUMENTS:\n";
haveDivergentArgs = true;
}
OS << " DIVERGENT: " << Context.print(entry) << '\n';
}
}
if (!AssumedDivergent.empty()) {
OS << "CYCLES ASSSUMED DIVERGENT:\n";
for (const CycleT *cycle : AssumedDivergent) {
OS << " " << cycle->print(Context) << '\n';
}
}
if (!DivergentExitCycles.empty()) {
OS << "CYCLES WITH DIVERGENT EXIT:\n";
for (const CycleT *cycle : DivergentExitCycles) {
OS << " " << cycle->print(Context) << '\n';
}
}
for (auto &block : F) {
OS << "\nBLOCK " << Context.print(&block) << '\n';
OS << "DEFINITIONS\n";
SmallVector<ConstValueRefT, 16> defs;
Context.appendBlockDefs(defs, block);
for (auto value : defs) {
if (isDivergent(value))
OS << " DIVERGENT: ";
else
OS << " ";
OS << Context.print(value) << '\n';
}
OS << "TERMINATORS\n";
SmallVector<const InstructionT *, 8> terms;
Context.appendBlockTerms(terms, block);
bool divergentTerminators = hasDivergentTerminator(block);
for (auto *T : terms) {
if (divergentTerminators)
OS << " DIVERGENT: ";
else
OS << " ";
OS << Context.print(T) << '\n';
}
OS << "END BLOCK\n";
}
}
template <typename ContextT>
bool GenericUniformityInfo<ContextT>::hasDivergence() const {
return DA->hasDivergence();
}
template <typename ContextT>
const typename ContextT::FunctionT &
GenericUniformityInfo<ContextT>::getFunction() const {
return DA->getFunction();
}
/// Whether \p V is divergent at its definition.
template <typename ContextT>
bool GenericUniformityInfo<ContextT>::isDivergent(ConstValueRefT V) const {
return DA->isDivergent(V);
}
template <typename ContextT>
bool GenericUniformityInfo<ContextT>::isDivergent(const InstructionT *I) const {
return DA->isDivergent(*I);
}
template <typename ContextT>
bool GenericUniformityInfo<ContextT>::isDivergentUse(const UseT &U) const {
return DA->isDivergentUse(U);
}
template <typename ContextT>
bool GenericUniformityInfo<ContextT>::hasDivergentTerminator(const BlockT &B) {
return DA->hasDivergentTerminator(B);
}
/// \brief T helper function for printing.
template <typename ContextT>
void GenericUniformityInfo<ContextT>::print(raw_ostream &out) const {
DA->print(out);
}
template <typename ContextT>
void llvm::ModifiedPostOrder<ContextT>::computeStackPO(
SmallVectorImpl<const BlockT *> &Stack, const CycleInfoT &CI,
const CycleT *Cycle, SmallPtrSetImpl<const BlockT *> &Finalized) {
LLVM_DEBUG(dbgs() << "inside computeStackPO\n");
while (!Stack.empty()) {
auto *NextBB = Stack.back();
if (Finalized.count(NextBB)) {
Stack.pop_back();
continue;
}
LLVM_DEBUG(dbgs() << " visiting " << CI.getSSAContext().print(NextBB)
<< "\n");
auto *NestedCycle = CI.getCycle(NextBB);
if (Cycle != NestedCycle && (!Cycle || Cycle->contains(NestedCycle))) {
LLVM_DEBUG(dbgs() << " found a cycle\n");
while (NestedCycle->getParentCycle() != Cycle)
NestedCycle = NestedCycle->getParentCycle();
SmallVector<BlockT *, 3> NestedExits;
NestedCycle->getExitBlocks(NestedExits);
bool PushedNodes = false;
for (auto *NestedExitBB : NestedExits) {
LLVM_DEBUG(dbgs() << " examine exit: "
<< CI.getSSAContext().print(NestedExitBB) << "\n");
if (Cycle && !Cycle->contains(NestedExitBB))
continue;
if (Finalized.count(NestedExitBB))
continue;
PushedNodes = true;
Stack.push_back(NestedExitBB);
LLVM_DEBUG(dbgs() << " pushed exit: "
<< CI.getSSAContext().print(NestedExitBB) << "\n");
}
if (!PushedNodes) {
// All loop exits finalized -> finish this node
Stack.pop_back();
computeCyclePO(CI, NestedCycle, Finalized);
}
continue;
}
LLVM_DEBUG(dbgs() << " no nested cycle, going into DAG\n");
// DAG-style
bool PushedNodes = false;
for (auto *SuccBB : successors(NextBB)) {
LLVM_DEBUG(dbgs() << " examine succ: "
<< CI.getSSAContext().print(SuccBB) << "\n");
if (Cycle && !Cycle->contains(SuccBB))
continue;
if (Finalized.count(SuccBB))
continue;
PushedNodes = true;
Stack.push_back(SuccBB);
LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(SuccBB)
<< "\n");
}
if (!PushedNodes) {
// Never push nodes twice
LLVM_DEBUG(dbgs() << " finishing node: "
<< CI.getSSAContext().print(NextBB) << "\n");
Stack.pop_back();
Finalized.insert(NextBB);
appendBlock(*NextBB);
}
}
LLVM_DEBUG(dbgs() << "exited computeStackPO\n");
}
template <typename ContextT>
void ModifiedPostOrder<ContextT>::computeCyclePO(
const CycleInfoT &CI, const CycleT *Cycle,
SmallPtrSetImpl<const BlockT *> &Finalized) {
LLVM_DEBUG(dbgs() << "inside computeCyclePO\n");
SmallVector<const BlockT *> Stack;
auto *CycleHeader = Cycle->getHeader();
LLVM_DEBUG(dbgs() << " noted header: "
<< CI.getSSAContext().print(CycleHeader) << "\n");
assert(!Finalized.count(CycleHeader));
Finalized.insert(CycleHeader);
// Visit the header last
LLVM_DEBUG(dbgs() << " finishing header: "
<< CI.getSSAContext().print(CycleHeader) << "\n");
appendBlock(*CycleHeader, Cycle->isReducible());
// Initialize with immediate successors
for (auto *BB : successors(CycleHeader)) {
LLVM_DEBUG(dbgs() << " examine succ: " << CI.getSSAContext().print(BB)
<< "\n");
if (!Cycle->contains(BB))
continue;
if (BB == CycleHeader)
continue;
if (!Finalized.count(BB)) {
LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(BB)
<< "\n");
Stack.push_back(BB);
}
}
// Compute PO inside region
computeStackPO(Stack, CI, Cycle, Finalized);
LLVM_DEBUG(dbgs() << "exited computeCyclePO\n");
}
/// \brief Generically compute the modified post order.
template <typename ContextT>
void llvm::ModifiedPostOrder<ContextT>::compute(const CycleInfoT &CI) {
SmallPtrSet<const BlockT *, 32> Finalized;
SmallVector<const BlockT *> Stack;
auto *F = CI.getFunction();
Stack.reserve(24); // FIXME made-up number
Stack.push_back(&F->front());
computeStackPO(Stack, CI, nullptr, Finalized);
}
} // namespace llvm
#undef DEBUG_TYPE
#endif // LLVM_ADT_GENERICUNIFORMITYIMPL_H