kloessner/probfd/acyclic__value__iteration_8h_source.html

#ifndef PROBFD_ALGORITHMS_ACYCLIC_VALUE_ITERATION_H

#define PROBFD_ALGORITHMS_ACYCLIC_VALUE_ITERATION_H


#include "probfd/storage/per_state_storage.h"


#include "probfd/distribution.h"

#include "probfd/mdp_algorithm.h"


#include <stack>


// Forward Declarations

namespace utils {

class CountdownTimer;

}


namespace probfd::policies {

template <typename, typename>

class MapPolicy;

}


namespace probfd::algorithms::acyclic_vi {


template <typename State, typename Action>


class AcyclicValueIterationObserver {

public:

    virtual ~AcyclicValueIterationObserver() = default;


    virtual void on_state_selected_for_expansion(const State&) = 0;


    virtual void on_goal_state(const State&) = 0;


    virtual void on_terminal_state(const State&) = 0;


    virtual void on_pruned_state(const State&) = 0;

};


template <typename State, typename Action>


struct StatisticsObserver

    : public AcyclicValueIterationObserver<State, Action> {

    unsigned long long state_expansions = 0;

    unsigned long long terminal_states = 0;

    unsigned long long goal_states = 0;

    unsigned long long pruned_states = 0;


    void on_state_selected_for_expansion(const State&);

    void on_goal_state(const State&);

    void on_terminal_state(const State&);

    void on_pruned_state(const State&);


    void print(std::ostream& out) const;

};


namespace internal {


template <typename Action>

struct StateInfo {

    enum Status : uint8_t { NEW, ON_STACK, CLOSED };

    Status status = NEW;

    std::optional<Action> best_action = std::nullopt;

    value_t value = -INFINITE_VALUE;

};


template <typename State, typename Action>

class AcyclicValueIterationObserverCollection {

    using Observer = AcyclicValueIterationObserver<State, Action>;


    std::vector<Observer> observers_;


public:

    void register_observer(std::shared_ptr<Observer> observer);


    void notify_state_selected_for_expansion(const State&);

    void notify_goal_state(const State&);

    void notify_terminal_state(const State&);

    void notify_pruned_state(const State&);

};


} // namespace internal


template <typename State, typename Action>


class AcyclicValueIteration : public MDPAlgorithm<State, Action> {

    using Base = typename AcyclicValueIteration::MDPAlgorithm;


    using PolicyType = typename Base::PolicyType;

    using MDPType = typename Base::MDPType;

    using EvaluatorType = typename Base::EvaluatorType;


    using MapPolicy = policies::MapPolicy<State, Action>;


    using StateInfo = internal::StateInfo<Action>;


    using Observer = AcyclicValueIterationObserver<State, Action>;

    using ObserverCollection =

        internal::AcyclicValueIterationObserverCollection<State, Action>;


    struct IncrementalExpansionInfo {

        const StateID state_id;

        StateInfo& state_info;


        // Applicable operators left to expand

        std::vector<Action> remaining_aops;


        // The current transition and transition successor

        Distribution<StateID> transition;

        typename Distribution<StateID>::const_iterator successor;


        // The current transition Q-value

        value_t t_value;


        IncrementalExpansionInfo(StateID state_id, StateInfo& state_info);


        void setup_transition(MDPType& mdp);


        void backtrack_successor(value_t probability, StateInfo& succ_info);


        bool advance(MDPType& mdp, MapPolicy* policy);


    private:

        bool next_successor();

        bool next_transition(MDPType& mdp, MapPolicy* policy);


        void finalize_transition();

        void finalize_expansion(MapPolicy* policy);

    };


    storage::PerStateStorage<StateInfo> state_infos_;

    std::stack<IncrementalExpansionInfo> expansion_stack_;


    ObserverCollection observers_;


public:

    std::unique_ptr<PolicyType> compute_policy(

        MDPType& mdp,

        EvaluatorType& heuristic,

        param_type<State> initial_state,

        ProgressReport progress,

        double max_time) override;


    Interval solve(

        MDPType& mdp,

        EvaluatorType& heuristic,

        param_type<State> initial_state,

        ProgressReport progress,

        double max_time) override;


    Interval solve(

        MDPType& mdp,

        EvaluatorType& heuristic,

        param_type<State> initial_state,

        double max_time,

        MapPolicy* policy);


    void register_observer(std::shared_ptr<Observer> observer);


private:

    bool push_successor(

        MDPType& mdp,

        MapPolicy* policy,

        IncrementalExpansionInfo& e,

        utils::CountdownTimer& timer);


    bool expand_state(

        MDPType& mdp,

        EvaluatorType& heuristic,

        IncrementalExpansionInfo& e_info);

};


} // namespace probfd::algorithms::acyclic_vi


#define GUARD_INCLUDE_PROBFD_ALGORITHMS_ACYCLIC_VALUE_ITERATION_H


#include "probfd/algorithms/acyclic_value_iteration_impl.h"


#undef GUARD_INCLUDE_PROBFD_ALGORITHMS_ACYCLIC_VALUE_ITERATION_H


#endif // PROBFD_ALGORITHMS_ACYCLIC_VALUE_ITERATION_H

probfd::Distribution
A convenience class that represents a finite probability distribution.
Definition task_state_space.h:27

probfd::MDPAlgorithm
Interface for MDP algorithm implementations.
Definition mdp_algorithm.h:29

probfd::ProgressReport
A registry for print functions related to search progress.
Definition progress_report.h:33

probfd::algorithms::acyclic_vi::AcyclicValueIteration
Implements acyclic Value Iteration.
Definition acyclic_value_iteration.h:113

probfd::algorithms::acyclic_vi::AcyclicValueIterationObserver
Models an observer subscribed to events of the acyclic value iteration algorithm.
Definition acyclic_value_iteration.h:32

probfd::algorithms::acyclic_vi::AcyclicValueIterationObserver::on_state_selected_for_expansion
virtual void on_state_selected_for_expansion(const State &)=0
Called when the algorithm selects a state for expansion.

probfd::algorithms::acyclic_vi::AcyclicValueIterationObserver::on_pruned_state
virtual void on_pruned_state(const State &)=0
Called when a state is pruned during the expansion check.

probfd::algorithms::acyclic_vi::AcyclicValueIterationObserver::on_goal_state
virtual void on_goal_state(const State &)=0
Called when a goal state is encountered during the expansion check.

probfd::algorithms::acyclic_vi::AcyclicValueIterationObserver::on_terminal_state
virtual void on_terminal_state(const State &)=0
Called when a terminal state is encountered during the expansion check.

probfd::algorithms::acyclic_vi
Namespace dedicated to the acyclic value iteration algorithm.
Definition acyclic_value_iteration.h:22

probfd::value_t
double value_t
Typedef for the state value type.
Definition aliases.h:7

probfd::param_type
typename std::conditional_t< is_cheap_to_copy_v< T >, T, const T & > param_type
Alias template defining the best way to pass a parameter of a given type.
Definition type_traits.h:25

probfd::Interval
Represents a closed interval over the extended reals as a pair of lower and upper bound.
Definition interval.h:12

probfd::StateID
A StateID represents a state within a StateIDMap. Just like Fast Downward's StateID type,...
Definition types.h:22

probfd::algorithms::acyclic_vi::StatisticsObserver
An observer that collects basic statistics of the acyclic value iteration algorithm.
Definition acyclic_value_iteration.h:58

probfd::algorithms::acyclic_vi::StatisticsObserver::on_state_selected_for_expansion
void on_state_selected_for_expansion(const State &)
Called when the algorithm selects a state for expansion.
Definition acyclic_value_iteration_impl.h:17

probfd::algorithms::acyclic_vi::StatisticsObserver::on_goal_state
void on_goal_state(const State &)
Called when a goal state is encountered during the expansion check.
Definition acyclic_value_iteration_impl.h:24

probfd::algorithms::acyclic_vi::StatisticsObserver::on_pruned_state
void on_pruned_state(const State &)
Called when a state is pruned during the expansion check.
Definition acyclic_value_iteration_impl.h:36

probfd::algorithms::acyclic_vi::StatisticsObserver::on_terminal_state
void on_terminal_state(const State &)
Called when a terminal state is encountered during the expansion check.
Definition acyclic_value_iteration_impl.h:30