clang 20.0.0 (based on r547379) from build 12806354. Bug: http://b/379133546 Test: N/A Change-Id: I2eb8938af55d809de674be63cb30cf27e801862b Upstream-Commit: ad834e67b1105d15ef907f6255d4c96e8e733f57
503 lines
20 KiB
C++
503 lines
20 KiB
C++
//== RangedConstraintManager.h ----------------------------------*- C++ -*--==//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Ranged constraint manager, built on SimpleConstraintManager.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_RANGEDCONSTRAINTMANAGER_H
|
|
#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_RANGEDCONSTRAINTMANAGER_H
|
|
|
|
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
|
|
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
|
|
#include "clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h"
|
|
#include "llvm/ADT/APSInt.h"
|
|
#include "llvm/Support/Allocator.h"
|
|
|
|
namespace clang {
|
|
|
|
namespace ento {
|
|
|
|
/// A Range represents the closed range [from, to]. The caller must
|
|
/// guarantee that from <= to. Note that Range is immutable, so as not
|
|
/// to subvert RangeSet's immutability.
|
|
class Range {
|
|
public:
|
|
Range(const llvm::APSInt &From, const llvm::APSInt &To) : Impl(&From, &To) {
|
|
assert(From <= To);
|
|
}
|
|
|
|
Range(const llvm::APSInt &Point) : Range(Point, Point) {}
|
|
|
|
bool Includes(const llvm::APSInt &Point) const {
|
|
return From() <= Point && Point <= To();
|
|
}
|
|
const llvm::APSInt &From() const { return *Impl.first; }
|
|
const llvm::APSInt &To() const { return *Impl.second; }
|
|
const llvm::APSInt *getConcreteValue() const {
|
|
return &From() == &To() ? &From() : nullptr;
|
|
}
|
|
|
|
void Profile(llvm::FoldingSetNodeID &ID) const {
|
|
ID.AddPointer(&From());
|
|
ID.AddPointer(&To());
|
|
}
|
|
void dump(raw_ostream &OS) const;
|
|
void dump() const;
|
|
|
|
// In order to keep non-overlapping ranges sorted, we can compare only From
|
|
// points.
|
|
bool operator<(const Range &RHS) const { return From() < RHS.From(); }
|
|
|
|
bool operator==(const Range &RHS) const { return Impl == RHS.Impl; }
|
|
bool operator!=(const Range &RHS) const { return !operator==(RHS); }
|
|
|
|
private:
|
|
std::pair<const llvm::APSInt *, const llvm::APSInt *> Impl;
|
|
};
|
|
|
|
/// @class RangeSet is a persistent set of non-overlapping ranges.
|
|
///
|
|
/// New RangeSet objects can be ONLY produced by RangeSet::Factory object, which
|
|
/// also supports the most common operations performed on range sets.
|
|
///
|
|
/// Empty set corresponds to an overly constrained symbol meaning that there
|
|
/// are no possible values for that symbol.
|
|
class RangeSet {
|
|
public:
|
|
class Factory;
|
|
|
|
private:
|
|
// We use llvm::SmallVector as the underlying container for the following
|
|
// reasons:
|
|
//
|
|
// * Range sets are usually very simple, 1 or 2 ranges.
|
|
// That's why llvm::ImmutableSet is not perfect.
|
|
//
|
|
// * Ranges in sets are NOT overlapping, so it is natural to keep them
|
|
// sorted for efficient operations and queries. For this reason,
|
|
// llvm::SmallSet doesn't fit the requirements, it is not sorted when it
|
|
// is a vector.
|
|
//
|
|
// * Range set operations usually a bit harder than add/remove a range.
|
|
// Complex operations might do many of those for just one range set.
|
|
// Formerly it used to be llvm::ImmutableSet, which is inefficient for our
|
|
// purposes as we want to make these operations BOTH immutable AND
|
|
// efficient.
|
|
//
|
|
// * Iteration over ranges is widespread and a more cache-friendly
|
|
// structure is preferred.
|
|
using ImplType = llvm::SmallVector<Range, 4>;
|
|
|
|
struct ContainerType : public ImplType, public llvm::FoldingSetNode {
|
|
void Profile(llvm::FoldingSetNodeID &ID) const {
|
|
for (const Range &It : *this) {
|
|
It.Profile(ID);
|
|
}
|
|
}
|
|
};
|
|
// This is a non-owning pointer to an actual container.
|
|
// The memory is fully managed by the factory and is alive as long as the
|
|
// factory itself is alive.
|
|
// It is a pointer as opposed to a reference, so we can easily reassign
|
|
// RangeSet objects.
|
|
using UnderlyingType = const ContainerType *;
|
|
UnderlyingType Impl;
|
|
|
|
public:
|
|
using const_iterator = ImplType::const_iterator;
|
|
|
|
const_iterator begin() const { return Impl->begin(); }
|
|
const_iterator end() const { return Impl->end(); }
|
|
size_t size() const { return Impl->size(); }
|
|
|
|
bool isEmpty() const { return Impl->empty(); }
|
|
|
|
class Factory {
|
|
public:
|
|
Factory(BasicValueFactory &BV) : ValueFactory(BV) {}
|
|
|
|
/// Create a new set with all ranges from both LHS and RHS.
|
|
/// Possible intersections are not checked here.
|
|
///
|
|
/// Complexity: O(N + M)
|
|
/// where N = size(LHS), M = size(RHS)
|
|
RangeSet add(RangeSet LHS, RangeSet RHS);
|
|
/// Create a new set with all ranges from the original set plus the new one.
|
|
/// Possible intersections are not checked here.
|
|
///
|
|
/// Complexity: O(N)
|
|
/// where N = size(Original)
|
|
RangeSet add(RangeSet Original, Range Element);
|
|
/// Create a new set with all ranges from the original set plus the point.
|
|
/// Possible intersections are not checked here.
|
|
///
|
|
/// Complexity: O(N)
|
|
/// where N = size(Original)
|
|
RangeSet add(RangeSet Original, const llvm::APSInt &Point);
|
|
/// Create a new set which is a union of two given ranges.
|
|
/// Possible intersections are not checked here.
|
|
///
|
|
/// Complexity: O(N + M)
|
|
/// where N = size(LHS), M = size(RHS)
|
|
RangeSet unite(RangeSet LHS, RangeSet RHS);
|
|
/// Create a new set by uniting given range set with the given range.
|
|
/// All intersections and adjacent ranges are handled here.
|
|
///
|
|
/// Complexity: O(N)
|
|
/// where N = size(Original)
|
|
RangeSet unite(RangeSet Original, Range Element);
|
|
/// Create a new set by uniting given range set with the given point.
|
|
/// All intersections and adjacent ranges are handled here.
|
|
///
|
|
/// Complexity: O(N)
|
|
/// where N = size(Original)
|
|
RangeSet unite(RangeSet Original, llvm::APSInt Point);
|
|
/// Create a new set by uniting given range set with the given range
|
|
/// between points. All intersections and adjacent ranges are handled here.
|
|
///
|
|
/// Complexity: O(N)
|
|
/// where N = size(Original)
|
|
RangeSet unite(RangeSet Original, llvm::APSInt From, llvm::APSInt To);
|
|
|
|
RangeSet getEmptySet() { return &EmptySet; }
|
|
|
|
/// Create a new set with just one range.
|
|
/// @{
|
|
RangeSet getRangeSet(Range Origin);
|
|
RangeSet getRangeSet(const llvm::APSInt &From, const llvm::APSInt &To) {
|
|
return getRangeSet(Range(From, To));
|
|
}
|
|
RangeSet getRangeSet(const llvm::APSInt &Origin) {
|
|
return getRangeSet(Origin, Origin);
|
|
}
|
|
/// @}
|
|
|
|
/// Intersect the given range sets.
|
|
///
|
|
/// Complexity: O(N + M)
|
|
/// where N = size(LHS), M = size(RHS)
|
|
RangeSet intersect(RangeSet LHS, RangeSet RHS);
|
|
/// Intersect the given set with the closed range [Lower, Upper].
|
|
///
|
|
/// Unlike the Range type, this range uses modular arithmetic, corresponding
|
|
/// to the common treatment of C integer overflow. Thus, if the Lower bound
|
|
/// is greater than the Upper bound, the range is taken to wrap around. This
|
|
/// is equivalent to taking the intersection with the two ranges [Min,
|
|
/// Upper] and [Lower, Max], or, alternatively, /removing/ all integers
|
|
/// between Upper and Lower.
|
|
///
|
|
/// Complexity: O(N)
|
|
/// where N = size(What)
|
|
RangeSet intersect(RangeSet What, llvm::APSInt Lower, llvm::APSInt Upper);
|
|
/// Intersect the given range with the given point.
|
|
///
|
|
/// The result can be either an empty set or a set containing the given
|
|
/// point depending on whether the point is in the range set.
|
|
///
|
|
/// Complexity: O(logN)
|
|
/// where N = size(What)
|
|
RangeSet intersect(RangeSet What, llvm::APSInt Point);
|
|
|
|
/// Delete the given point from the range set.
|
|
///
|
|
/// Complexity: O(N)
|
|
/// where N = size(From)
|
|
RangeSet deletePoint(RangeSet From, const llvm::APSInt &Point);
|
|
/// Negate the given range set.
|
|
///
|
|
/// Turn all [A, B] ranges to [-B, -A], when "-" is a C-like unary minus
|
|
/// operation under the values of the type.
|
|
///
|
|
/// We also handle MIN because applying unary minus to MIN does not change
|
|
/// it.
|
|
/// Example 1:
|
|
/// char x = -128; // -128 is a MIN value in a range of 'char'
|
|
/// char y = -x; // y: -128
|
|
///
|
|
/// Example 2:
|
|
/// unsigned char x = 0; // 0 is a MIN value in a range of 'unsigned char'
|
|
/// unsigned char y = -x; // y: 0
|
|
///
|
|
/// And it makes us to separate the range
|
|
/// like [MIN, N] to [MIN, MIN] U [-N, MAX].
|
|
/// For instance, whole range is {-128..127} and subrange is [-128,-126],
|
|
/// thus [-128,-127,-126,...] negates to [-128,...,126,127].
|
|
///
|
|
/// Negate restores disrupted ranges on bounds,
|
|
/// e.g. [MIN, B] => [MIN, MIN] U [-B, MAX] => [MIN, B].
|
|
///
|
|
/// Negate is a self-inverse function, i.e. negate(negate(R)) == R.
|
|
///
|
|
/// Complexity: O(N)
|
|
/// where N = size(What)
|
|
RangeSet negate(RangeSet What);
|
|
/// Performs promotions, truncations and conversions of the given set.
|
|
///
|
|
/// This function is optimized for each of the six cast cases:
|
|
/// - noop
|
|
/// - conversion
|
|
/// - truncation
|
|
/// - truncation-conversion
|
|
/// - promotion
|
|
/// - promotion-conversion
|
|
///
|
|
/// NOTE: This function is NOT self-inverse for truncations, because of
|
|
/// the higher bits loss:
|
|
/// - castTo(castTo(OrigRangeOfInt, char), int) != OrigRangeOfInt.
|
|
/// - castTo(castTo(OrigRangeOfChar, int), char) == OrigRangeOfChar.
|
|
/// But it is self-inverse for all the rest casts.
|
|
///
|
|
/// Complexity:
|
|
/// - Noop O(1);
|
|
/// - Truncation O(N^2);
|
|
/// - Another case O(N);
|
|
/// where N = size(What)
|
|
RangeSet castTo(RangeSet What, APSIntType Ty);
|
|
RangeSet castTo(RangeSet What, QualType T);
|
|
|
|
/// Return associated value factory.
|
|
BasicValueFactory &getValueFactory() const { return ValueFactory; }
|
|
|
|
private:
|
|
/// Return a persistent version of the given container.
|
|
RangeSet makePersistent(ContainerType &&From);
|
|
/// Construct a new persistent version of the given container.
|
|
ContainerType *construct(ContainerType &&From);
|
|
|
|
RangeSet intersect(const ContainerType &LHS, const ContainerType &RHS);
|
|
/// NOTE: This function relies on the fact that all values in the
|
|
/// containers are persistent (created via BasicValueFactory::getValue).
|
|
ContainerType unite(const ContainerType &LHS, const ContainerType &RHS);
|
|
|
|
/// This is a helper function for `castTo` method. Implies not to be used
|
|
/// separately.
|
|
/// Performs a truncation case of a cast operation.
|
|
ContainerType truncateTo(RangeSet What, APSIntType Ty);
|
|
|
|
/// This is a helper function for `castTo` method. Implies not to be used
|
|
/// separately.
|
|
/// Performs a conversion case and a promotion-conversion case for signeds
|
|
/// of a cast operation.
|
|
ContainerType convertTo(RangeSet What, APSIntType Ty);
|
|
|
|
/// This is a helper function for `castTo` method. Implies not to be used
|
|
/// separately.
|
|
/// Performs a promotion for unsigneds only.
|
|
ContainerType promoteTo(RangeSet What, APSIntType Ty);
|
|
|
|
// Many operations include producing new APSInt values and that's why
|
|
// we need this factory.
|
|
BasicValueFactory &ValueFactory;
|
|
// Allocator for all the created containers.
|
|
// Containers might own their own memory and that's why it is specific
|
|
// for the type, so it calls container destructors upon deletion.
|
|
llvm::SpecificBumpPtrAllocator<ContainerType> Arena;
|
|
// Usually we deal with the same ranges and range sets over and over.
|
|
// Here we track all created containers and try not to repeat ourselves.
|
|
llvm::FoldingSet<ContainerType> Cache;
|
|
static ContainerType EmptySet;
|
|
};
|
|
|
|
RangeSet(const RangeSet &) = default;
|
|
RangeSet &operator=(const RangeSet &) = default;
|
|
RangeSet(RangeSet &&) = default;
|
|
RangeSet &operator=(RangeSet &&) = default;
|
|
~RangeSet() = default;
|
|
|
|
/// Construct a new RangeSet representing '{ [From, To] }'.
|
|
RangeSet(Factory &F, const llvm::APSInt &From, const llvm::APSInt &To)
|
|
: RangeSet(F.getRangeSet(From, To)) {}
|
|
|
|
/// Construct a new RangeSet representing the given point as a range.
|
|
RangeSet(Factory &F, const llvm::APSInt &Point)
|
|
: RangeSet(F.getRangeSet(Point)) {}
|
|
|
|
static void Profile(llvm::FoldingSetNodeID &ID, const RangeSet &RS) {
|
|
ID.AddPointer(RS.Impl);
|
|
}
|
|
|
|
/// Profile - Generates a hash profile of this RangeSet for use
|
|
/// by FoldingSet.
|
|
void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, *this); }
|
|
|
|
/// getConcreteValue - If a symbol is constrained to equal a specific integer
|
|
/// constant then this method returns that value. Otherwise, it returns
|
|
/// NULL.
|
|
const llvm::APSInt *getConcreteValue() const {
|
|
return Impl->size() == 1 ? begin()->getConcreteValue() : nullptr;
|
|
}
|
|
|
|
/// Get the minimal value covered by the ranges in the set.
|
|
///
|
|
/// Complexity: O(1)
|
|
const llvm::APSInt &getMinValue() const;
|
|
/// Get the maximal value covered by the ranges in the set.
|
|
///
|
|
/// Complexity: O(1)
|
|
const llvm::APSInt &getMaxValue() const;
|
|
|
|
bool isUnsigned() const;
|
|
uint32_t getBitWidth() const;
|
|
APSIntType getAPSIntType() const;
|
|
|
|
/// Test whether the given point is contained by any of the ranges.
|
|
///
|
|
/// Complexity: O(logN)
|
|
/// where N = size(this)
|
|
bool contains(llvm::APSInt Point) const { return containsImpl(Point); }
|
|
|
|
bool containsZero() const {
|
|
APSIntType T{getMinValue()};
|
|
return contains(T.getZeroValue());
|
|
}
|
|
|
|
/// Test if the range is the [0,0] range.
|
|
///
|
|
/// Complexity: O(1)
|
|
bool encodesFalseRange() const {
|
|
const llvm::APSInt *Constant = getConcreteValue();
|
|
return Constant && Constant->isZero();
|
|
}
|
|
|
|
/// Test if the range doesn't contain zero.
|
|
///
|
|
/// Complexity: O(logN)
|
|
/// where N = size(this)
|
|
bool encodesTrueRange() const { return !containsZero(); }
|
|
|
|
void dump(raw_ostream &OS) const;
|
|
void dump() const;
|
|
|
|
bool operator==(const RangeSet &Other) const { return *Impl == *Other.Impl; }
|
|
bool operator!=(const RangeSet &Other) const { return !(*this == Other); }
|
|
|
|
private:
|
|
/* implicit */ RangeSet(ContainerType *RawContainer) : Impl(RawContainer) {}
|
|
/* implicit */ RangeSet(UnderlyingType Ptr) : Impl(Ptr) {}
|
|
|
|
/// Pin given points to the type represented by the current range set.
|
|
///
|
|
/// This makes parameter points to be in-out parameters.
|
|
/// In order to maintain consistent types across all of the ranges in the set
|
|
/// and to keep all the operations to compare ONLY points of the same type, we
|
|
/// need to pin every point before any operation.
|
|
///
|
|
/// @Returns true if the given points can be converted to the target type
|
|
/// without changing the values (i.e. trivially) and false otherwise.
|
|
/// @{
|
|
bool pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const;
|
|
bool pin(llvm::APSInt &Point) const;
|
|
/// @}
|
|
|
|
// This version of this function modifies its arguments (pins it).
|
|
bool containsImpl(llvm::APSInt &Point) const;
|
|
|
|
friend class Factory;
|
|
};
|
|
|
|
using ConstraintMap = llvm::ImmutableMap<SymbolRef, RangeSet>;
|
|
ConstraintMap getConstraintMap(ProgramStateRef State);
|
|
|
|
class RangedConstraintManager : public SimpleConstraintManager {
|
|
public:
|
|
RangedConstraintManager(ExprEngine *EE, SValBuilder &SB)
|
|
: SimpleConstraintManager(EE, SB) {}
|
|
|
|
~RangedConstraintManager() override;
|
|
|
|
//===------------------------------------------------------------------===//
|
|
// Implementation for interface from SimpleConstraintManager.
|
|
//===------------------------------------------------------------------===//
|
|
|
|
ProgramStateRef assumeSym(ProgramStateRef State, SymbolRef Sym,
|
|
bool Assumption) override;
|
|
|
|
ProgramStateRef assumeSymInclusiveRange(ProgramStateRef State, SymbolRef Sym,
|
|
const llvm::APSInt &From,
|
|
const llvm::APSInt &To,
|
|
bool InRange) override;
|
|
|
|
ProgramStateRef assumeSymUnsupported(ProgramStateRef State, SymbolRef Sym,
|
|
bool Assumption) override;
|
|
|
|
protected:
|
|
/// Assume a constraint between a symbolic expression and a concrete integer.
|
|
virtual ProgramStateRef assumeSymRel(ProgramStateRef State, SymbolRef Sym,
|
|
BinaryOperator::Opcode op,
|
|
const llvm::APSInt &Int);
|
|
|
|
//===------------------------------------------------------------------===//
|
|
// Interface that subclasses must implement.
|
|
//===------------------------------------------------------------------===//
|
|
|
|
// Each of these is of the form "$Sym+Adj <> V", where "<>" is the comparison
|
|
// operation for the method being invoked.
|
|
|
|
virtual ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym,
|
|
const llvm::APSInt &V,
|
|
const llvm::APSInt &Adjustment) = 0;
|
|
|
|
virtual ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym,
|
|
const llvm::APSInt &V,
|
|
const llvm::APSInt &Adjustment) = 0;
|
|
|
|
virtual ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym,
|
|
const llvm::APSInt &V,
|
|
const llvm::APSInt &Adjustment) = 0;
|
|
|
|
virtual ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym,
|
|
const llvm::APSInt &V,
|
|
const llvm::APSInt &Adjustment) = 0;
|
|
|
|
virtual ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym,
|
|
const llvm::APSInt &V,
|
|
const llvm::APSInt &Adjustment) = 0;
|
|
|
|
virtual ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym,
|
|
const llvm::APSInt &V,
|
|
const llvm::APSInt &Adjustment) = 0;
|
|
|
|
virtual ProgramStateRef assumeSymWithinInclusiveRange(
|
|
ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
|
|
const llvm::APSInt &To, const llvm::APSInt &Adjustment) = 0;
|
|
|
|
virtual ProgramStateRef assumeSymOutsideInclusiveRange(
|
|
ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
|
|
const llvm::APSInt &To, const llvm::APSInt &Adjustment) = 0;
|
|
|
|
//===------------------------------------------------------------------===//
|
|
// Internal implementation.
|
|
//===------------------------------------------------------------------===//
|
|
private:
|
|
static void computeAdjustment(SymbolRef &Sym, llvm::APSInt &Adjustment);
|
|
};
|
|
|
|
/// Try to simplify a given symbolic expression based on the constraints in
|
|
/// State. This is needed because the Environment bindings are not getting
|
|
/// updated when a new constraint is added to the State. If the symbol is
|
|
/// simplified to a non-symbol (e.g. to a constant) then the original symbol
|
|
/// is returned. We use this function in the family of assumeSymNE/EQ/LT/../GE
|
|
/// functions where we can work only with symbols. Use the other function
|
|
/// (simplifyToSVal) if you are interested in a simplification that may yield
|
|
/// a concrete constant value.
|
|
SymbolRef simplify(ProgramStateRef State, SymbolRef Sym);
|
|
|
|
/// Try to simplify a given symbolic expression's associated `SVal` based on the
|
|
/// constraints in State. This is very similar to `simplify`, but this function
|
|
/// always returns the simplified SVal. The simplified SVal might be a single
|
|
/// constant (i.e. `ConcreteInt`).
|
|
SVal simplifyToSVal(ProgramStateRef State, SymbolRef Sym);
|
|
|
|
} // namespace ento
|
|
} // namespace clang
|
|
|
|
REGISTER_FACTORY_WITH_PROGRAMSTATE(ConstraintMap)
|
|
|
|
#endif
|