1#ifndef PROBFD_ALGORITHMS_HEURISTIC_SEARCH_BASE_H
2#define PROBFD_ALGORITHMS_HEURISTIC_SEARCH_BASE_H
4#include "probfd/algorithms/heuristic_search_state_information.h"
5#include "probfd/algorithms/types.h"
7#include "probfd/mdp_algorithm.h"
8#include "probfd/progress_report.h"
10#if defined(EXPENSIVE_STATISTICS)
11#include "downward/utils/timer.h"
26template <
typename,
typename>
31template <
typename,
typename>
34class SuccessorSampler;
38template <
typename,
typename,
typename,
typename>
40template <
typename,
typename,
typename>
42template <
typename,
typename,
typename>
55 unsigned long long evaluated_states = 0;
56 unsigned long long pruned_states = 0;
57 unsigned long long goal_states = 0;
59 unsigned long long expanded_states = 0;
60 unsigned long long terminal_states = 0;
61 unsigned long long self_loop_states = 0;
63 unsigned long long value_changes = 0;
64 unsigned long long policy_changes = 0;
65 unsigned long long value_updates = 0;
66 unsigned long long policy_updates = 0;
68 value_t initial_state_estimate = 0;
69 bool initial_state_found_terminal =
false;
71#if defined(EXPENSIVE_STATISTICS)
72 utils::Timer update_time = utils::Timer(
true);
73 utils::Timer policy_selection_time = utils::Timer(
true);
79 void print(std::ostream& out)
const;
82template <
typename StateInfo>
84 storage::PerStateStorage<StateInfo> state_infos_;
87 StateInfo& operator[](
StateID sid) {
return state_infos_[sid]; }
88 const StateInfo& operator[](
StateID sid)
const {
return state_infos_[sid]; }
92 return state_infos_[state_id].get_value();
97 return state_infos_[state_id].get_bounds();
100 void reset() { std::ranges::for_each(state_infos_, &StateInfo::clear); }
112template <
typename State,
typename Action,
typename StateInfoT>
114 template <
bool b,
typename T>
115 using const_if = std::conditional_t<b, const T, T>;
121 using TransitionType = Transition<Action>;
126 template <
typename,
typename,
typename,
typename>
129 template <
typename,
typename,
typename>
130 friend class fret::PolicyGraph;
132 template <
typename,
typename,
typename>
133 friend class fret::ValueGraph;
136 using StateInfo = StateInfoT;
138 static constexpr bool StorePolicy = StateInfo::StorePolicy;
139 static constexpr bool UseInterval = StateInfo::UseInterval;
145 const std::shared_ptr<PolicyPickerType> policy_chooser_;
149 internal::StateInfos<StateInfo> state_infos_;
153 struct BellmanResult {
154 AlgorithmValueType best_value;
155 std::optional<TransitionType> transition;
160 std::shared_ptr<PolicyPickerType> policy_chooser);
181 const std::vector<TransitionType>& transitions,
182 value_t termination_cost)
const;
203 std::vector<TransitionType>& transitions,
205 std::vector<AlgorithmValueType>& qvalues,
220 std::optional<Action> previous_greedy_action,
221 std::vector<TransitionType>& greedy_transitions);
230 StateInfo& state_info,
231 AlgorithmValueType other,
241 StateInfo& state_info,
242 const std::optional<TransitionType>& transition)
243 requires(StorePolicy);
246 void initialize_initial_state(
251 void expand_and_initialize(
255 StateInfo& state_info,
256 std::vector<TransitionType>& transitions);
258 void generate_non_tip_transitions(
261 std::vector<TransitionType>& transitions)
const;
263 void print_statistics(std::ostream& out)
const;
270 StateInfo& state_info);
272 AlgorithmValueType compute_qvalue(
275 const TransitionType& transition)
const;
277 AlgorithmValueType compute_q_values(
280 std::vector<TransitionType>& transitions,
282 std::vector<AlgorithmValueType>& qvalues)
const;
284 AlgorithmValueType filter_greedy_transitions(
285 std::vector<TransitionType>& transitions,
286 std::vector<AlgorithmValueType>& qvalues,
287 const AlgorithmValueType& best_value,
299template <
typename State,
typename Action,
typename StateInfoT>
303 using AlgorithmBase =
typename HeuristicSearchAlgorithm::MDPAlgorithm;
304 using HSBase =
typename HeuristicSearchAlgorithm::HeuristicSearchBase;
307 using TransitionType = HSBase::TransitionType;
308 using AlgorithmValueType = HSBase::AlgorithmValueType;
311 using PolicyType =
typename AlgorithmBase::PolicyType;
313 using MDPType =
typename AlgorithmBase::MDPType;
314 using EvaluatorType =
typename AlgorithmBase::EvaluatorType;
316 using StateInfo =
typename HSBase::StateInfo;
317 using PolicyPicker =
typename HSBase::PolicyPickerType;
321 using HSBase::HSBase;
328 double max_time)
final;
330 std::unique_ptr<PolicyType> compute_policy(
335 double max_time)
final;
349 double max_time) = 0;
366template <
typename State,
typename Action,
typename StateInfoT>
369 using AlgorithmBase =
typename FRETHeuristicSearchAlgorithm::MDPAlgorithm;
371 typename FRETHeuristicSearchAlgorithm::HeuristicSearchAlgorithm;
374 using PolicyType =
typename AlgorithmBase::PolicyType;
376 using MDPType =
typename AlgorithmBase::MDPType;
377 using EvaluatorType =
typename AlgorithmBase::EvaluatorType;
379 using StateInfo =
typename HSBase::StateInfo;
380 using PolicyPicker =
typename HSBase::PolicyPickerType;
384 using HSBase::HSBase;
397#define GUARD_INCLUDE_PROBFD_ALGORITHMS_HEURISTIC_SEARCH_BASE_H
398#include "probfd/algorithms/heuristic_search_base_impl.h"
399#undef GUARD_INCLUDE_PROBFD_ALGORITHMS_HEURISTIC_SEARCH_BASE_H
Interface for MDP algorithm implementations.
Definition mdp_algorithm.h:29
A registry for print functions related to search progress.
Definition progress_report.h:33
An strategy interface used to choose break ties between multiple greedy actions for a state.
Definition policy_picker.h:57
Interface providing access to various state properties during heuristic search.
Definition state_properties.h:22
Implemetation of the Find-Revise-Eliminate-Traps (FRET) framework kolobov:etal:icaps-11 .
Definition heuristic_search_base.h:39
Heuristics search algorithm that can be used within FRET.
Definition heuristic_search_base.h:368
virtual void reset_search_state()
Resets the h search algorithm object to a clean state.
Definition heuristic_search_base.h:392
Extends HeuristicSearchBase with default implementations for MDPAlgorithm.
Definition heuristic_search_base.h:302
virtual void print_additional_statistics(std::ostream &out) const =0
Prints additional statistics to the output stream.
void print_statistics(std::ostream &out) const final
Prints algorithm statistics to the specified output stream.
Definition heuristic_search_base_impl.h:484
virtual Interval do_solve(MDPType &mdp, EvaluatorType &h, param_type< State > state, ProgressReport &progress, double max_time)=0
Solves for the optimal state value of the input state.
The common base class for MDP h search algorithms.
Definition heuristic_search_base.h:113
bool update_value(StateInfo &state_info, AlgorithmValueType other, value_t epsilon=g_epsilon)
Updates the value of the state associated with the given storage.
Definition heuristic_search_base_impl.h:153
AlgorithmValueType compute_bellman(CostFunctionType &cost_function, StateID state_id, const std::vector< TransitionType > &transitions, value_t termination_cost) const
Computes the Bellman operator value for a state.
Definition heuristic_search_base_impl.h:74
Interval lookup_bounds(StateID state_id) const
Looks up the current value interval of state_id.
Definition heuristic_search_base_impl.h:60
bool update_policy(StateInfo &state_info, const std::optional< TransitionType > &transition)
Updates the current greedy action of the state associated with the given storage.
Definition heuristic_search_base_impl.h:165
std::optional< TransitionType > select_greedy_transition(MDPType &mdp, std::optional< Action > previous_greedy_action, std::vector< TransitionType > &greedy_transitions)
Selects a greedy transition from the given list of greedy transitions through the policy selector pas...
Definition heuristic_search_base_impl.h:130
bool was_visited(StateID state_id) const
Checks if the state represented by state_id has been visited yet.
Definition heuristic_search_base_impl.h:67
AlgorithmValueType compute_bellman_and_greedy(CostFunctionType &cost_function, StateID state_id, std::vector< TransitionType > &transitions, value_t termination_cost, std::vector< AlgorithmValueType > &qvalues, value_t epsilon=g_epsilon) const
Computes the Bellman operator value for a state, as well as all transitions achieving a value epsilon...
Definition heuristic_search_base_impl.h:95
Namespace dedicated to the Find, Revise, Eliminate Traps (FRET) framework.
Definition fret.h:23
Namespace dedicated to the MDP h search base implementation.
Definition heuristic_search_base.h:47
This namespace contains implementations of SSP search algorithms.
Definition acyclic_value_iteration.h:22
std::conditional_t< UseInterval, Interval, value_t > AlgorithmValue
Convenience value type alias for algorithms selecting interval iteration behaviour based on a templat...
Definition types.h:14
The top-level namespace of probabilistic Fast Downward.
Definition command_line.h:8
double value_t
Typedef for the state value type.
Definition aliases.h:7
typename std::conditional_t< is_cheap_to_copy_v< T >, T, const T & > param_type
Alias template defining the best way to pass a parameter of a given type.
Definition type_traits.h:25
value_t g_epsilon
The default tolerance value for approximate comparisons.
Represents a closed interval over the extended reals as a pair of lower and upper bound.
Definition interval.h:12
A StateID represents a state within a StateIDMap. Just like Fast Downward's StateID type,...
Definition types.h:22
Base statistics for MDP h search.
Definition heuristic_search_base.h:54
void print(std::ostream &out) const
Prints the statistics to the specified output stream.
Definition heuristic_search_base_impl.h:26