kloessner/probfd/lrtdp__impl_8h_source.html

#ifndef GUARD_INCLUDE_PROBFD_ALGORITHMS_LRTDP_H

#error "This file should only be included from lrtdp.h"

#endif


#include "probfd/algorithms/successor_sampler.h"


#include "downward/utils/countdown_timer.h"


#include <cassert>

#include <ranges>


namespace probfd::algorithms::lrtdp {


namespace internal {


inline void Statistics::print(std::ostream& out) const

{

    out << "  Trials: " << trials << std::endl;

    out << "  Bellman backups (trials): " << trial_bellman_backups << std::endl;

    out << "  Bellman backups (check&solved): "

        << check_and_solve_bellman_backups << std::endl;

}


} // namespace internal


template <typename State, typename Action, bool UseInterval>


LRTDP<State, Action, UseInterval>::LRTDP(

    std::shared_ptr<PolicyPickerType> policy_chooser,

    TrialTerminationCondition stop_consistent,

    std::shared_ptr<SuccessorSamplerType> succ_sampler)

    : Base(policy_chooser)

    , stop_consistent_(stop_consistent)

    , sample_(succ_sampler)

{

}


template <typename State, typename Action, bool UseInterval>


void LRTDP<State, Action, UseInterval>::reset_search_state()

{

    this->state_infos_.reset();

}


template <typename State, typename Action, bool UseInterval>

Interval LRTDP<State, Action, UseInterval>::do_solve(

    MDPType& mdp,

    EvaluatorType& heuristic,

    param_type<State> state,

    ProgressReport& progress,

    double max_time)

{

    utils::CountdownTimer timer(max_time);


    const StateID state_id = mdp.get_state_id(state);

    const StateInfo& state_info = this->state_infos_[state_id];


    progress.register_bound("v", [&state_info]() {

        return as_interval(state_info.value);

    });


    progress.register_print(

        [&](std::ostream& out) { out << "trials=" << statistics_.trials; });


    while (!state_info.is_solved()) {

        trial(mdp, heuristic, state_id, timer);

        this->statistics_.trials++;

        progress.print();

    }


    return state_info.get_bounds();

}


template <typename State, typename Action, bool UseInterval>


void LRTDP<State, Action, UseInterval>::print_additional_statistics(

    std::ostream& out) const

{

    statistics_.print(out);

}


template <typename State, typename Action, bool UseInterval>

void LRTDP<State, Action, UseInterval>::trial(

    MDPType& mdp,

    EvaluatorType& heuristic,

    StateID initial_state,

    utils::CountdownTimer& timer)

{

    assert(!this->state_infos_[initial_state].is_solved());


    using enum TrialTerminationCondition;


    ClearGuard guard(current_trial_);


    current_trial_.push_back(initial_state);


    for (;;) {

        timer.throw_if_expired();


        const StateID state_id = current_trial_.back();


        auto& state_info = this->state_infos_[state_id];


        if (state_info.is_solved()) {

            current_trial_.pop_back();

            break;

        }


        const State state = mdp.get_state(state_id);

        const value_t termination_cost =

            mdp.get_termination_info(state).get_cost();


        ClearGuard _(transitions_, qvalues_);


        if (state_info.is_on_fringe()) {

            this->expand_and_initialize(

                mdp,

                heuristic,

                state,

                state_info,

                transitions_);

        } else {

            this->generate_non_tip_transitions(mdp, state, transitions_);

        }


        this->statistics_.trial_bellman_backups++;


        auto value = this->compute_bellman_and_greedy(

            mdp,

            state_id,

            transitions_,

            termination_cost,

            qvalues_);


        auto transition = this->select_greedy_transition(

            mdp,

            state_info.get_policy(),

            transitions_);


        bool value_changed = this->update_value(state_info, value);

        this->update_policy(state_info, transition);


        if (!transition) {

            state_info.mark_solved();

            current_trial_.pop_back();

            break;

        }


        assert(!state_info.is_goal_or_terminal());


        if ((stop_consistent_ == CONSISTENT && !value_changed) ||

            (stop_consistent_ == INCONSISTENT && value_changed) ||

            (stop_consistent_ == REVISITED && state_info.is_closed())) {

            break;

        }


        if (stop_consistent_ == REVISITED) {

            state_info.mark_closed();

        }


        auto next = sample_->sample(

            state_id,

            transition->action,

            transition->successor_dist,

            this->state_infos_);


        current_trial_.push_back(next);

    }


    if (stop_consistent_ == REVISITED) {

        for (const StateID state :

             current_trial_ | std::views::reverse | std::views::drop(1)) {

            auto& info = this->state_infos_[state];

            assert(info.is_closed());

            info.unmark_closed();

        }

    }


    do {

        timer.throw_if_expired();


        if (!check_and_solve(mdp, heuristic, current_trial_.back(), timer)) {

            break;

        }


        current_trial_.pop_back();

    } while (!current_trial_.empty());

}


template <typename State, typename Action, bool UseInterval>

bool LRTDP<State, Action, UseInterval>::check_and_solve(

    MDPType& mdp,

    EvaluatorType& heuristic,

    StateID init_state_id,

    utils::CountdownTimer& timer)

{

    assert(!current_trial_.empty() && policy_queue_.empty());


    ClearGuard guard(visited_);


    {

        StateInfo& state_info = this->state_infos_[init_state_id];

        if (state_info.is_solved()) return true;

        policy_queue_.emplace_back(init_state_id);

        state_info.mark_closed();

    }


    bool rv = true;


    do {

        timer.throw_if_expired();


        const auto state_id = policy_queue_.back();

        policy_queue_.pop_back();


        auto& info = this->state_infos_[state_id];

        assert(!info.is_solved());

        assert(info.is_closed());


        visited_.push_front(state_id);


        const State state = mdp.get_state(state_id);

        const value_t termination_cost =

            mdp.get_termination_info(state).get_cost();


        ClearGuard _(transitions_, qvalues_);


        if (info.is_on_fringe()) {

            this->expand_and_initialize(

                mdp,

                heuristic,

                state,

                info,

                transitions_);

        } else {

            this->generate_non_tip_transitions(mdp, state, transitions_);

        }


        this->statistics_.check_and_solve_bellman_backups++;


        auto value = this->compute_bellman_and_greedy(

            mdp,

            state_id,

            transitions_,

            termination_cost,

            qvalues_);


        auto transition = this->select_greedy_transition(

            mdp,

            info.get_policy(),

            transitions_);


        bool value_changed = this->update_value(info, value);

        this->update_policy(info, transition);


        if constexpr (UseInterval) {

            if (!info.bounds_agree()) {

                rv = false;

                continue;

            }

        } else {

            if (value_changed) {

                rv = false;

                continue;

            }

        }


        if (!transition) {

            info.mark_solved();

            continue;

        }


        for (StateID succ_id : transition->successor_dist.support()) {

            StateInfo& succ_info = this->state_infos_[succ_id];

            if (!succ_info.is_closed() && !succ_info.is_solved()) {

                succ_info.mark_closed();

                policy_queue_.emplace_back(succ_id);

            }

        }

    } while (!policy_queue_.empty());


    for (StateID sid : visited_) {

        StateInfo& info = this->state_infos_[sid];


        if (info.is_solved()) continue;


        assert(info.is_closed());

        info.unmark_closed();


        if (rv) {

            info.mark_solved();

        } else {

            assert(!info.is_on_fringe());


            const State state = mdp.get_state(sid);

            const value_t termination_cost =

                mdp.get_termination_info(state).get_cost();


            ClearGuard _(transitions_, qvalues_);

            this->generate_non_tip_transitions(mdp, state, transitions_);


            statistics_.check_and_solve_bellman_backups++;


            auto value = this->compute_bellman_and_greedy(

                mdp,

                sid,

                transitions_,

                termination_cost,

                qvalues_);


            auto transition = this->select_greedy_transition(

                mdp,

                info.get_policy(),

                transitions_);


            this->update_value(info, value);

            this->update_policy(info, transition);

        }

    }


    return rv;

}


} // namespace probfd::algorithms::lrtdp

probfd::ProgressReport
A registry for print functions related to search progress.
Definition progress_report.h:33

probfd::ProgressReport::print
void print()
Prints the output to the internal output stream, if enabled.

probfd::ProgressReport::register_bound
void register_bound(const std::string &property_name, BoundProperty property)
Appends a new bound property with a given name to the list of bound properties to be printed when the...

probfd::ProgressReport::register_print
void register_print(Printer f)
Appends a new printer to the list of printers.

probfd::algorithms::ClearGuard
Helper RAII class that ensures that containers are cleared when going out of scope.
Definition utils.h:23

probfd::algorithms::lrtdp::LRTDP
Implements the labelled real-time dynamic programming (LRTDP) algorithm bonet:geffner:icaps-03.
Definition lrtdp.h:129

probfd::algorithms::lrtdp::LRTDP::LRTDP
LRTDP(std::shared_ptr< PolicyPickerType > policy_chooser, TrialTerminationCondition stop_consistent, std::shared_ptr< SuccessorSamplerType > succ_sampler)
Constructs an LRTDP solver object.
Definition lrtdp_impl.h:27

probfd::algorithms::lrtdp::LRTDP::print_additional_statistics
void print_additional_statistics(std::ostream &out) const override
Prints additional statistics to the output stream.
Definition lrtdp_impl.h:73

probfd::algorithms::lrtdp::LRTDP::reset_search_state
void reset_search_state() override
Resets the h search algorithm object to a clean state.
Definition lrtdp_impl.h:38

probfd::algorithms::lrtdp
Namespace dedicated to labelled real-time dynamic programming (LRTDP).
Definition lrtdp.h:19

probfd::algorithms::lrtdp::TrialTerminationCondition
TrialTerminationCondition
Enumeration type specifying the termination condition for trials sampled during LRTDP.
Definition lrtdp.h:25

probfd::algorithms::lrtdp::TrialTerminationCondition::INCONSISTENT
@ INCONSISTENT

probfd::algorithms::lrtdp::TrialTerminationCondition::REVISITED
@ REVISITED

probfd::algorithms::lrtdp::TrialTerminationCondition::CONSISTENT
@ CONSISTENT

probfd::algorithms::as_interval
Interval as_interval(value_t lower_bound)
Returns the interval with the given lower bound and infinte upper bound.

probfd::value_t
double value_t
Typedef for the state value type.
Definition aliases.h:7

probfd::param_type
typename std::conditional_t< is_cheap_to_copy_v< T >, T, const T & > param_type
Alias template defining the best way to pass a parameter of a given type.
Definition type_traits.h:25

probfd::Interval
Represents a closed interval over the extended reals as a pair of lower and upper bound.
Definition interval.h:12

probfd::StateID
A StateID represents a state within a StateIDMap. Just like Fast Downward's StateID type,...
Definition types.h:22