kloessner/probfd/topological__value__iteration_8h_source.html

#ifndef PROBFD_ALGORITHMS_TOPOLOGICAL_VALUE_ITERATION_H

#define PROBFD_ALGORITHMS_TOPOLOGICAL_VALUE_ITERATION_H


#include "probfd/algorithms/types.h"


#include "probfd/storage/per_state_storage.h"


#include "probfd/distribution.h"

#include "probfd/mdp_algorithm.h"


#include <deque>

#include <limits>

#include <ostream>

#include <vector>


// Forward Declarations

namespace utils {

class CountdownTimer;

}


namespace probfd::policies {

template <typename, typename>

class MapPolicy;

}


namespace probfd::algorithms::topological_vi {


struct Statistics {

    unsigned long long expanded_states = 0;

    unsigned long long terminal_states = 0;

    unsigned long long goal_states = 0;

    unsigned long long sccs = 0;

    unsigned long long singleton_sccs = 0;

    unsigned long long bellman_backups = 0;

    unsigned long long pruned = 0;


    void print(std::ostream& out) const;

};


template <typename State, typename Action, bool UseInterval = false>


class TopologicalValueIteration : public MDPAlgorithm<State, Action> {

    using Base = typename TopologicalValueIteration::MDPAlgorithm;


    using PolicyType = typename Base::PolicyType;

    using MDPType = typename Base::MDPType;

    using EvaluatorType = typename Base::EvaluatorType;


    using MapPolicy = policies::MapPolicy<State, Action>;

    using AlgorithmValueType = algorithms::AlgorithmValue<UseInterval>;


    struct StateInfo {

        // Status Flags

        enum { NEW, CLOSED, ONSTACK };


        unsigned stack_id = 0;

        uint8_t status = NEW;

    };


    struct QValueInfo {

        // The action id this Q value belongs to.

        Action action;


        // Precomputed part of the Q-value.

        // Sum of action cost plus those weighted successor values which

        // have already converged due to topological ordering.

        AlgorithmValueType conv_part;


        // Pointers to successor values which have not yet converged,

        // self-loops excluded.

        std::vector<ItemProbabilityPair<AlgorithmValueType*>> nconv_successors;


        QValueInfo(Action action, value_t action_cost);


        bool finalize_transition(value_t self_loop_prob);


        AlgorithmValueType compute_q_value() const;

    };


    struct StackInfo {

        StateID state_id;


        // Reference to the state value of the state.

        AlgorithmValueType* value;


        // Precomputed part of the max of the value update.

        // Minimum over all Q values of actions leaving the SCC.

        AlgorithmValueType conv_part;


        // Remaining Q values which have not yet converged.

        std::vector<QValueInfo> nconv_qs;


        // The optimal action

        std::optional<Action> best_action = std::nullopt;


        // The optimal action among those leaving the SCC.

        std::optional<Action> best_converged = std::nullopt;


        StackInfo(StateID state_id, AlgorithmValueType& value_ref);


        bool update_value();

    };


    struct ExplorationInfo {

        // Exploration State

        std::vector<Action> aops;         // Remaining unexpanded operators

        Distribution<StateID> transition; // Currently expanded transition

        Distribution<StateID>::const_iterator successor; // Current successor


    public:

        // Immutable info

        StateID state_id; // State this information belongs to

        StackInfo& stack_info;

        unsigned stackidx; // Index on the stack of the associated state


        unsigned lowlink;


        value_t self_loop_prob = 0_vt;


        ExplorationInfo(

            StateID state_id,

            StackInfo& stack_info,

            unsigned stackidx);


        void update_lowlink(unsigned upd);


        bool next_transition(MDPType& mdp);

        bool next_successor();


        bool forward_non_loop_transition(MDPType& mdp, const State& state);

        bool forward_non_loop_successor();


        Action& get_current_action();

        ItemProbabilityPair<StateID> get_current_successor();

    };


    using StackIterator = typename std::vector<StackInfo>::iterator;


    // Algorithm parameters

    const bool expand_goals_;


    // Algorithm state

    storage::PerStateStorage<StateInfo> state_information_;

    std::deque<ExplorationInfo> exploration_stack_;

    std::vector<StackInfo> stack_;


    Statistics statistics_;


public:

    explicit TopologicalValueIteration(bool expand_goals);


    std::unique_ptr<PolicyType> compute_policy(

        MDPType& mdp,

        EvaluatorType& heuristic,

        param_type<State> state,

        ProgressReport,

        double max_time) override;


    Interval solve(

        MDPType& mdp,

        EvaluatorType& heuristic,

        param_type<State> state,

        ProgressReport,

        double max_time) override;


    void print_statistics(std::ostream& out) const override;


    [[nodiscard]]

    Statistics get_statistics() const;


    template <typename ValueStore>

    Interval solve(

        MDPType& mdp,

        EvaluatorType& heuristic,

        StateID init_state_id,

        ValueStore& value_store,

        double max_time = std::numeric_limits<double>::infinity(),

        MapPolicy* policy = nullptr);


private:

    void push_state(

        StateID state_id,

        StateInfo& state_info,

        AlgorithmValueType& state_value);


    bool initialize_state(

        MDPType& mdp,

        EvaluatorType& heuristic,

        ExplorationInfo& exp_info,

        auto& value_store);


    template <typename ValueStore>

    bool successor_loop(

        MDPType& mdp,

        ExplorationInfo& explore,

        ValueStore& value_store,

        utils::CountdownTimer& timer);


    void scc_found(auto scc, MapPolicy* policy, utils::CountdownTimer& timer);

};


} // namespace probfd::algorithms::topological_vi


#define GUARD_INCLUDE_PROBFD_ALGORITHMS_TOPOLOGICAL_VALUE_ITERATION_H

#include "probfd/algorithms/topological_value_iteration_impl.h"

#undef GUARD_INCLUDE_PROBFD_ALGORITHMS_TOPOLOGICAL_VALUE_ITERATION_H


#endif // PROBFD_ALGORITHMS_TOPOLOGICAL_VALUE_ITERATION_H

probfd::Distribution
A convenience class that represents a finite probability distribution.
Definition task_state_space.h:27

probfd::ItemProbabilityPair
An item-probability pair.
Definition distribution.h:20

probfd::MDPAlgorithm
Interface for MDP algorithm implementations.
Definition mdp_algorithm.h:29

probfd::ProgressReport
A registry for print functions related to search progress.
Definition progress_report.h:33

probfd::algorithms::topological_vi::TopologicalValueIteration
Implements Topological Value Iteration dai:etal:jair-11.
Definition topological_value_iteration.h:68

probfd::algorithms::topological_vi::TopologicalValueIteration::get_statistics
Statistics get_statistics() const
Retreive the algorithm statistics.
Definition topological_value_iteration_impl.h:249

probfd::algorithms::topological_vi::TopologicalValueIteration::print_statistics
void print_statistics(std::ostream &out) const override
Prints algorithm statistics to the specified output stream.
Definition topological_value_iteration_impl.h:241

probfd::algorithms::topological_vi
Namespace dedicated to Topological Value Iteration (TVI).
Definition topological_value_iteration.h:27

probfd::algorithms::AlgorithmValue
std::conditional_t< UseInterval, Interval, value_t > AlgorithmValue
Convenience value type alias for algorithms selecting interval iteration behaviour based on a templat...
Definition types.h:14

probfd::value_t
double value_t
Typedef for the state value type.
Definition aliases.h:7

probfd::param_type
typename std::conditional_t< is_cheap_to_copy_v< T >, T, const T & > param_type
Alias template defining the best way to pass a parameter of a given type.
Definition type_traits.h:25

probfd::Interval
Represents a closed interval over the extended reals as a pair of lower and upper bound.
Definition interval.h:12

probfd::StateID
A StateID represents a state within a StateIDMap. Just like Fast Downward's StateID type,...
Definition types.h:22

probfd::algorithms::topological_vi::Statistics
Topological value iteration statistics.
Definition topological_value_iteration.h:32