kloessner/probfd/heuristic__search__base_8h_source.html

#ifndef PROBFD_ALGORITHMS_HEURISTIC_SEARCH_BASE_H

#define PROBFD_ALGORITHMS_HEURISTIC_SEARCH_BASE_H


#include "probfd/algorithms/heuristic_search_state_information.h"

#include "probfd/algorithms/types.h"


#include "probfd/mdp_algorithm.h"

#include "probfd/progress_report.h"


#if defined(EXPENSIVE_STATISTICS)

#include "downward/utils/timer.h"

#endif


#include <algorithm>

#include <iostream>

#include <limits>

#include <type_traits>

#include <vector>


// Forward Declarations

namespace probfd {

template <typename>

class Distribution;

template <typename>

struct Transition;

template <typename, typename>

class CostFunction;

} // namespace probfd


namespace probfd::algorithms {

template <typename, typename>

class PolicyPicker;

template <typename>

class SuccessorSampler;

} // namespace probfd::algorithms


namespace probfd::algorithms::fret {

template <typename, typename, typename, typename>

class FRET;

template <typename, typename, typename>

class PolicyGraph;

template <typename, typename, typename>

class ValueGraph;

} // namespace probfd::algorithms::fret


namespace probfd::algorithms::heuristic_search {


namespace internal {


struct Statistics {

    unsigned long long evaluated_states = 0;

    unsigned long long pruned_states = 0;

    unsigned long long goal_states = 0;


    unsigned long long expanded_states = 0;

    unsigned long long terminal_states = 0;

    unsigned long long self_loop_states = 0;


    unsigned long long value_changes = 0;

    unsigned long long policy_changes = 0;

    unsigned long long value_updates = 0;

    unsigned long long policy_updates = 0;


    value_t initial_state_estimate = 0;

    bool initial_state_found_terminal = false;


#if defined(EXPENSIVE_STATISTICS)

    utils::Timer update_time = utils::Timer(true);

    utils::Timer policy_selection_time = utils::Timer(true);

#endif


    void print(std::ostream& out) const;

};


template <typename StateInfo>

class StateInfos : public StateProperties {

    storage::PerStateStorage<StateInfo> state_infos_;


public:

    StateInfo& operator[](StateID sid) { return state_infos_[sid]; }

    const StateInfo& operator[](StateID sid) const { return state_infos_[sid]; }


    value_t lookup_value(StateID state_id) override

    {

        return state_infos_[state_id].get_value();

    }


    Interval lookup_bounds(StateID state_id) override

    {

        return state_infos_[state_id].get_bounds();

    }


    void reset() { std::ranges::for_each(state_infos_, &StateInfo::clear); }

};


} // namespace internal


template <typename State, typename Action, typename StateInfoT>


class HeuristicSearchBase {

    template <bool b, typename T>

    using const_if = std::conditional_t<b, const T, T>;


protected:

    using MDPType = MDP<State, Action>;

    using CostFunctionType = CostFunction<State, Action>;

    using EvaluatorType = Evaluator<State>;

    using TransitionType = Transition<Action>;


    using PolicyPickerType = PolicyPicker<State, Action>;


    // Fret implementation has access to the internals of this base class.

    template <typename, typename, typename, typename>

    friend class fret::FRET;


    template <typename, typename, typename>

    friend class fret::PolicyGraph;


    template <typename, typename, typename>

    friend class fret::ValueGraph;


public:

    using StateInfo = StateInfoT;


    static constexpr bool StorePolicy = StateInfo::StorePolicy;

    static constexpr bool UseInterval = StateInfo::UseInterval;


    using AlgorithmValueType = AlgorithmValue<UseInterval>;


private:

    // Algorithm parameters

    const std::shared_ptr<PolicyPickerType> policy_chooser_;


protected:

    // Algorithm state

    internal::StateInfos<StateInfo> state_infos_;


    internal::Statistics statistics_;


    struct BellmanResult {

        AlgorithmValueType best_value;

        std::optional<TransitionType> transition;

    };


public:

    explicit HeuristicSearchBase(

        std::shared_ptr<PolicyPickerType> policy_chooser);


    [[nodiscard]]

    Interval lookup_bounds(StateID state_id) const;


    [[nodiscard]]

    bool was_visited(StateID state_id) const;


    AlgorithmValueType compute_bellman(

        CostFunctionType& cost_function,

        StateID state_id,

        const std::vector<TransitionType>& transitions,

        value_t termination_cost) const;


    AlgorithmValueType compute_bellman_and_greedy(

        CostFunctionType& cost_function,

        StateID state_id,

        std::vector<TransitionType>& transitions,

        value_t termination_cost,

        std::vector<AlgorithmValueType>& qvalues,

        value_t epsilon = g_epsilon) const;


    std::optional<TransitionType> select_greedy_transition(

        MDPType& mdp,

        std::optional<Action> previous_greedy_action,

        std::vector<TransitionType>& greedy_transitions);


    bool update_value(

        StateInfo& state_info,

        AlgorithmValueType other,

        value_t epsilon = g_epsilon);


    bool update_policy(

        StateInfo& state_info,

        const std::optional<TransitionType>& transition)

        requires(StorePolicy);


protected:

    void initialize_initial_state(

        MDPType& mdp,

        EvaluatorType& h,

        param_type<State> state);


    void expand_and_initialize(

        MDPType& mdp,

        EvaluatorType& h,

        param_type<State> state,

        StateInfo& state_info,

        std::vector<TransitionType>& transitions);


    void generate_non_tip_transitions(

        MDPType& mdp,

        param_type<State> state,

        std::vector<TransitionType>& transitions) const;


    void print_statistics(std::ostream& out) const;


private:

    void initialize(

        MDPType& mdp,

        EvaluatorType& h,

        param_type<State> state,

        StateInfo& state_info);


    AlgorithmValueType compute_qvalue(

        value_t action_cost,

        StateID state_id,

        const TransitionType& transition) const;


    AlgorithmValueType compute_q_values(

        CostFunctionType& cost_function,

        StateID state_id,

        std::vector<TransitionType>& transitions,

        value_t termination_cost,

        std::vector<AlgorithmValueType>& qvalues) const;


    AlgorithmValueType filter_greedy_transitions(

        std::vector<TransitionType>& transitions,

        std::vector<AlgorithmValueType>& qvalues,

        const AlgorithmValueType& best_value,

        value_t epsilon = g_epsilon) const;

};


template <typename State, typename Action, typename StateInfoT>


class HeuristicSearchAlgorithm

    : public MDPAlgorithm<State, Action>

    , public HeuristicSearchBase<State, Action, StateInfoT> {

    using AlgorithmBase = typename HeuristicSearchAlgorithm::MDPAlgorithm;

    using HSBase = typename HeuristicSearchAlgorithm::HeuristicSearchBase;


public:

    using TransitionType = HSBase::TransitionType;

    using AlgorithmValueType = HSBase::AlgorithmValueType;


protected:

    using PolicyType = typename AlgorithmBase::PolicyType;


    using MDPType = typename AlgorithmBase::MDPType;

    using EvaluatorType = typename AlgorithmBase::EvaluatorType;


    using StateInfo = typename HSBase::StateInfo;

    using PolicyPicker = typename HSBase::PolicyPickerType;


public:

    // Inherited constructor

    using HSBase::HSBase;


    Interval solve(

        MDPType& mdp,

        EvaluatorType& h,

        param_type<State> state,

        ProgressReport progress,

        double max_time) final;


    std::unique_ptr<PolicyType> compute_policy(

        MDPType& mdp,

        EvaluatorType& h,

        param_type<State> state,

        ProgressReport progress,

        double max_time) final;


    void print_statistics(std::ostream& out) const final;


    virtual Interval do_solve(

        MDPType& mdp,

        EvaluatorType& h,

        param_type<State> state,

        ProgressReport& progress,

        double max_time) = 0;


    virtual void print_additional_statistics(std::ostream& out) const = 0;

};


template <typename State, typename Action, typename StateInfoT>


class FRETHeuristicSearchAlgorithm

    : public HeuristicSearchAlgorithm<State, Action, StateInfoT> {

    using AlgorithmBase = typename FRETHeuristicSearchAlgorithm::MDPAlgorithm;

    using HSBase =

        typename FRETHeuristicSearchAlgorithm::HeuristicSearchAlgorithm;


protected:

    using PolicyType = typename AlgorithmBase::PolicyType;


    using MDPType = typename AlgorithmBase::MDPType;

    using EvaluatorType = typename AlgorithmBase::EvaluatorType;


    using StateInfo = typename HSBase::StateInfo;

    using PolicyPicker = typename HSBase::PolicyPickerType;


public:

    // Inherited constructor

    using HSBase::HSBase;


    virtual void reset_search_state() {}

};


} // namespace probfd::algorithms::heuristic_search


#define GUARD_INCLUDE_PROBFD_ALGORITHMS_HEURISTIC_SEARCH_BASE_H

#include "probfd/algorithms/heuristic_search_base_impl.h"

#undef GUARD_INCLUDE_PROBFD_ALGORITHMS_HEURISTIC_SEARCH_BASE_H


#endif // __HEURISTIC_SEARCH_BASE_H__

probfd::CostFunction< State, Action >

probfd::Evaluator< State >

probfd::MDPAlgorithm
Interface for MDP algorithm implementations.
Definition mdp_algorithm.h:29

probfd::MDP< State, Action >

probfd::ProgressReport
A registry for print functions related to search progress.
Definition progress_report.h:33

probfd::algorithms::PolicyPicker
An strategy interface used to choose break ties between multiple greedy actions for a state.
Definition policy_picker.h:57

probfd::algorithms::StateProperties
Interface providing access to various state properties during heuristic search.
Definition state_properties.h:22

probfd::algorithms::fret::FRET
Implemetation of the Find-Revise-Eliminate-Traps (FRET) framework kolobov:etal:icaps-11 .
Definition heuristic_search_base.h:39

probfd::algorithms::heuristic_search::FRETHeuristicSearchAlgorithm
Heuristics search algorithm that can be used within FRET.
Definition heuristic_search_base.h:368

probfd::algorithms::heuristic_search::FRETHeuristicSearchAlgorithm::reset_search_state
virtual void reset_search_state()
Resets the h search algorithm object to a clean state.
Definition heuristic_search_base.h:392

probfd::algorithms::heuristic_search::HeuristicSearchAlgorithm
Extends HeuristicSearchBase with default implementations for MDPAlgorithm.
Definition heuristic_search_base.h:302

probfd::algorithms::heuristic_search::HeuristicSearchAlgorithm::print_additional_statistics
virtual void print_additional_statistics(std::ostream &out) const =0
Prints additional statistics to the output stream.

probfd::algorithms::heuristic_search::HeuristicSearchAlgorithm::print_statistics
void print_statistics(std::ostream &out) const final
Prints algorithm statistics to the specified output stream.
Definition heuristic_search_base_impl.h:484

probfd::algorithms::heuristic_search::HeuristicSearchAlgorithm::do_solve
virtual Interval do_solve(MDPType &mdp, EvaluatorType &h, param_type< State > state, ProgressReport &progress, double max_time)=0
Solves for the optimal state value of the input state.

probfd::algorithms::heuristic_search::HeuristicSearchBase
The common base class for MDP h search algorithms.
Definition heuristic_search_base.h:113

probfd::algorithms::heuristic_search::HeuristicSearchBase::update_value
bool update_value(StateInfo &state_info, AlgorithmValueType other, value_t epsilon=g_epsilon)
Updates the value of the state associated with the given storage.
Definition heuristic_search_base_impl.h:153

probfd::algorithms::heuristic_search::HeuristicSearchBase::compute_bellman
AlgorithmValueType compute_bellman(CostFunctionType &cost_function, StateID state_id, const std::vector< TransitionType > &transitions, value_t termination_cost) const
Computes the Bellman operator value for a state.
Definition heuristic_search_base_impl.h:74

probfd::algorithms::heuristic_search::HeuristicSearchBase::lookup_bounds
Interval lookup_bounds(StateID state_id) const
Looks up the current value interval of state_id.
Definition heuristic_search_base_impl.h:60

probfd::algorithms::heuristic_search::HeuristicSearchBase::update_policy
bool update_policy(StateInfo &state_info, const std::optional< TransitionType > &transition)
Updates the current greedy action of the state associated with the given storage.
Definition heuristic_search_base_impl.h:165

probfd::algorithms::heuristic_search::HeuristicSearchBase::select_greedy_transition
std::optional< TransitionType > select_greedy_transition(MDPType &mdp, std::optional< Action > previous_greedy_action, std::vector< TransitionType > &greedy_transitions)
Selects a greedy transition from the given list of greedy transitions through the policy selector pas...
Definition heuristic_search_base_impl.h:130

probfd::algorithms::heuristic_search::HeuristicSearchBase::was_visited
bool was_visited(StateID state_id) const
Checks if the state represented by state_id has been visited yet.
Definition heuristic_search_base_impl.h:67

probfd::algorithms::heuristic_search::HeuristicSearchBase::compute_bellman_and_greedy
AlgorithmValueType compute_bellman_and_greedy(CostFunctionType &cost_function, StateID state_id, std::vector< TransitionType > &transitions, value_t termination_cost, std::vector< AlgorithmValueType > &qvalues, value_t epsilon=g_epsilon) const
Computes the Bellman operator value for a state, as well as all transitions achieving a value epsilon...
Definition heuristic_search_base_impl.h:95

probfd::algorithms::fret
Namespace dedicated to the Find, Revise, Eliminate Traps (FRET) framework.
Definition fret.h:23

probfd::algorithms::heuristic_search
Namespace dedicated to the MDP h search base implementation.
Definition heuristic_search_base.h:47

probfd::algorithms
This namespace contains implementations of SSP search algorithms.
Definition acyclic_value_iteration.h:22

probfd::algorithms::AlgorithmValue
std::conditional_t< UseInterval, Interval, value_t > AlgorithmValue
Convenience value type alias for algorithms selecting interval iteration behaviour based on a templat...
Definition types.h:14

probfd
The top-level namespace of probabilistic Fast Downward.
Definition command_line.h:8

probfd::value_t
double value_t
Typedef for the state value type.
Definition aliases.h:7

probfd::param_type
typename std::conditional_t< is_cheap_to_copy_v< T >, T, const T & > param_type
Alias template defining the best way to pass a parameter of a given type.
Definition type_traits.h:25

probfd::g_epsilon
value_t g_epsilon
The default tolerance value for approximate comparisons.

probfd::Interval
Represents a closed interval over the extended reals as a pair of lower and upper bound.
Definition interval.h:12

probfd::StateID
A StateID represents a state within a StateIDMap. Just like Fast Downward's StateID type,...
Definition types.h:22

probfd::algorithms::heuristic_search::internal::Statistics
Base statistics for MDP h search.
Definition heuristic_search_base.h:54

probfd::algorithms::heuristic_search::internal::Statistics::print
void print(std::ostream &out) const
Prints the statistics to the specified output stream.
Definition heuristic_search_base_impl.h:26