1#ifndef GUARD_INCLUDE_PROBFD_ALGORITHMS_LRTDP_H
2#error "This file should only be included from lrtdp.h"
5#include "probfd/algorithms/successor_sampler.h"
7#include "downward/utils/countdown_timer.h"
16inline void Statistics::print(std::ostream& out)
const
18 out <<
" Trials: " << trials << std::endl;
19 out <<
" Bellman backups (trials): " << trial_bellman_backups << std::endl;
20 out <<
" Bellman backups (check&solved): "
21 << check_and_solve_bellman_backups << std::endl;
26template <
typename State,
typename Action,
bool UseInterval>
28 std::shared_ptr<PolicyPickerType> policy_chooser,
30 std::shared_ptr<SuccessorSamplerType> succ_sampler)
31 : Base(policy_chooser)
32 , stop_consistent_(stop_consistent)
33 , sample_(succ_sampler)
37template <
typename State,
typename Action,
bool UseInterval>
40 this->state_infos_.reset();
43template <
typename State,
typename Action,
bool UseInterval>
46 EvaluatorType& heuristic,
51 utils::CountdownTimer timer(max_time);
53 const StateID state_id = mdp.get_state_id(state);
54 const StateInfo& state_info = this->state_infos_[state_id];
61 [&](std::ostream& out) { out <<
"trials=" << statistics_.trials; });
63 while (!state_info.is_solved()) {
64 trial(mdp, heuristic, state_id, timer);
65 this->statistics_.trials++;
69 return state_info.get_bounds();
72template <
typename State,
typename Action,
bool UseInterval>
74 std::ostream& out)
const
76 statistics_.print(out);
79template <
typename State,
typename Action,
bool UseInterval>
82 EvaluatorType& heuristic,
84 utils::CountdownTimer& timer)
86 assert(!this->state_infos_[initial_state].is_solved());
92 current_trial_.push_back(initial_state);
95 timer.throw_if_expired();
97 const StateID state_id = current_trial_.back();
99 auto& state_info = this->state_infos_[state_id];
101 if (state_info.is_solved()) {
102 current_trial_.pop_back();
106 const State state = mdp.get_state(state_id);
107 const value_t termination_cost =
108 mdp.get_termination_info(state).get_cost();
112 if (state_info.is_on_fringe()) {
113 this->expand_and_initialize(
120 this->generate_non_tip_transitions(mdp, state, transitions_);
123 this->statistics_.trial_bellman_backups++;
125 auto value = this->compute_bellman_and_greedy(
132 auto transition = this->select_greedy_transition(
134 state_info.get_policy(),
137 bool value_changed = this->update_value(state_info, value);
138 this->update_policy(state_info, transition);
141 state_info.mark_solved();
142 current_trial_.pop_back();
146 assert(!state_info.is_goal_or_terminal());
148 if ((stop_consistent_ ==
CONSISTENT && !value_changed) ||
150 (stop_consistent_ ==
REVISITED && state_info.is_closed())) {
155 state_info.mark_closed();
158 auto next = sample_->sample(
161 transition->successor_dist,
164 current_trial_.push_back(next);
168 for (
const StateID state :
169 current_trial_ | std::views::reverse | std::views::drop(1)) {
170 auto& info = this->state_infos_[state];
171 assert(info.is_closed());
172 info.unmark_closed();
177 timer.throw_if_expired();
179 if (!check_and_solve(mdp, heuristic, current_trial_.back(), timer)) {
183 current_trial_.pop_back();
184 }
while (!current_trial_.empty());
187template <
typename State,
typename Action,
bool UseInterval>
188bool LRTDP<State, Action, UseInterval>::check_and_solve(
190 EvaluatorType& heuristic,
191 StateID init_state_id,
192 utils::CountdownTimer& timer)
194 assert(!current_trial_.empty() && policy_queue_.empty());
196 ClearGuard guard(visited_);
199 StateInfo& state_info = this->state_infos_[init_state_id];
200 if (state_info.is_solved())
return true;
201 policy_queue_.emplace_back(init_state_id);
202 state_info.mark_closed();
208 timer.throw_if_expired();
210 const auto state_id = policy_queue_.back();
211 policy_queue_.pop_back();
213 auto& info = this->state_infos_[state_id];
214 assert(!info.is_solved());
215 assert(info.is_closed());
217 visited_.push_front(state_id);
219 const State state = mdp.get_state(state_id);
220 const value_t termination_cost =
221 mdp.get_termination_info(state).get_cost();
223 ClearGuard _(transitions_, qvalues_);
225 if (info.is_on_fringe()) {
226 this->expand_and_initialize(
233 this->generate_non_tip_transitions(mdp, state, transitions_);
236 this->statistics_.check_and_solve_bellman_backups++;
238 auto value = this->compute_bellman_and_greedy(
245 auto transition = this->select_greedy_transition(
250 bool value_changed = this->update_value(info, value);
251 this->update_policy(info, transition);
253 if constexpr (UseInterval) {
254 if (!info.bounds_agree()) {
270 for (StateID succ_id : transition->successor_dist.support()) {
271 StateInfo& succ_info = this->state_infos_[succ_id];
272 if (!succ_info.is_closed() && !succ_info.is_solved()) {
273 succ_info.mark_closed();
274 policy_queue_.emplace_back(succ_id);
277 }
while (!policy_queue_.empty());
279 for (StateID sid : visited_) {
280 StateInfo& info = this->state_infos_[sid];
282 if (info.is_solved())
continue;
284 assert(info.is_closed());
285 info.unmark_closed();
290 assert(!info.is_on_fringe());
292 const State state = mdp.get_state(sid);
293 const value_t termination_cost =
294 mdp.get_termination_info(state).get_cost();
296 ClearGuard _(transitions_, qvalues_);
297 this->generate_non_tip_transitions(mdp, state, transitions_);
299 statistics_.check_and_solve_bellman_backups++;
301 auto value = this->compute_bellman_and_greedy(
308 auto transition = this->select_greedy_transition(
313 this->update_value(info, value);
314 this->update_policy(info, transition);
A registry for print functions related to search progress.
Definition progress_report.h:33
void print()
Prints the output to the internal output stream, if enabled.
void register_bound(const std::string &property_name, BoundProperty property)
Appends a new bound property with a given name to the list of bound properties to be printed when the...
void register_print(Printer f)
Appends a new printer to the list of printers.
Helper RAII class that ensures that containers are cleared when going out of scope.
Definition utils.h:23
Implements the labelled real-time dynamic programming (LRTDP) algorithm bonet:geffner:icaps-03.
Definition lrtdp.h:129
LRTDP(std::shared_ptr< PolicyPickerType > policy_chooser, TrialTerminationCondition stop_consistent, std::shared_ptr< SuccessorSamplerType > succ_sampler)
Constructs an LRTDP solver object.
Definition lrtdp_impl.h:27
void print_additional_statistics(std::ostream &out) const override
Prints additional statistics to the output stream.
Definition lrtdp_impl.h:73
void reset_search_state() override
Resets the h search algorithm object to a clean state.
Definition lrtdp_impl.h:38
Namespace dedicated to labelled real-time dynamic programming (LRTDP).
Definition lrtdp.h:19
TrialTerminationCondition
Enumeration type specifying the termination condition for trials sampled during LRTDP.
Definition lrtdp.h:25
Interval as_interval(value_t lower_bound)
Returns the interval with the given lower bound and infinte upper bound.
double value_t
Typedef for the state value type.
Definition aliases.h:7
typename std::conditional_t< is_cheap_to_copy_v< T >, T, const T & > param_type
Alias template defining the best way to pass a parameter of a given type.
Definition type_traits.h:25
Represents a closed interval over the extended reals as a pair of lower and upper bound.
Definition interval.h:12
A StateID represents a state within a StateIDMap. Just like Fast Downward's StateID type,...
Definition types.h:22