AI 24/25 Project Software
Documentation for the AI 24/25 course programming project software
Loading...
Searching...
No Matches
lrtdp.h
1#ifndef PROBFD_ALGORITHMS_LRTDP_H
2#define PROBFD_ALGORITHMS_LRTDP_H
3
4#include "probfd/algorithms/heuristic_search_base.h"
5
6#include <deque>
7
8// Forward Declarations
9namespace utils {
10class CountdownTimer;
11}
12
13namespace probfd::algorithms {
14template <typename>
15class SuccessorSampler;
16}
17
20
45
46namespace internal {
47
48struct Statistics {
49 unsigned long long trials = 0;
50 unsigned long long trial_bellman_backups = 0;
51 unsigned long long check_and_solve_bellman_backups = 0;
52
53 void print(std::ostream& out) const;
54};
55
56template <typename Action, bool UseInterval>
57struct PerStateInformation
58 : public heuristic_search::
59 PerStateBaseInformation<Action, true, UseInterval> {
60private:
61 using Base = typename heuristic_search::PerStateBaseInformation<Action, true, UseInterval>;
62
63public:
64 static constexpr uint8_t VISITED = 0b01 << Base::BITS;
65 static constexpr uint8_t SOLVED = 0b10 << Base::BITS;
66 static constexpr uint8_t BITS = Base::BITS + 2;
67 static constexpr uint8_t MASK = 0b11 << Base::BITS;
68
69 bool is_solved() const
70 {
71 return this->info & SOLVED || this->is_goal_or_terminal();
72 }
73
74 void mark_solved() { this->info |= SOLVED; }
75
76 bool is_closed() const { return (this->info & VISITED) != 0; }
77
78 void mark_closed()
79 {
80 assert(!is_solved());
81 this->info |= VISITED;
82 }
83
84 void unmark_closed()
85 {
86 assert(!is_solved());
87 this->info &= ~VISITED;
88 }
89
90 void clear() { this->info &= ~MASK; }
91};
92
93} // namespace internal
94
124template <typename State, typename Action, bool UseInterval>
125class LRTDP
127 State,
128 Action,
129 internal::PerStateInformation<Action, UseInterval>> {
130 using Base = typename LRTDP::FRETHeuristicSearchAlgorithm;
131
132 using AlgorithmValueType = Base::AlgorithmValueType;
133
134public:
135 using StateInfo = typename Base::StateInfo;
136
137private:
138 using MDPType = typename Base::MDPType;
139 using EvaluatorType = typename Base::EvaluatorType;
140 using PolicyPickerType = typename Base::PolicyPicker;
141
143
144 using Statistics = internal::Statistics;
145
146 // Algorithm parameters
147 const TrialTerminationCondition stop_consistent_;
148 const std::shared_ptr<SuccessorSamplerType> sample_;
149
150 // Algorithm state
151 std::vector<StateID> current_trial_;
152 std::vector<StateID> policy_queue_;
153 std::deque<StateID> visited_;
154
155 Statistics statistics_;
156
157 // Re-used buffer
158 std::vector<Transition<Action>> transitions_;
159 std::vector<AlgorithmValueType> qvalues_;
160
161public:
165 LRTDP(
166 std::shared_ptr<PolicyPickerType> policy_chooser,
167 TrialTerminationCondition stop_consistent,
168 std::shared_ptr<SuccessorSamplerType> succ_sampler);
169
170 void reset_search_state() override;
171
172protected:
173 Interval do_solve(
174 MDPType& mdp,
175 EvaluatorType& heuristic,
176 param_type<State> state,
177 ProgressReport& progress,
178 double max_time) override;
179
180 void print_additional_statistics(std::ostream& out) const override;
181
182private:
183 void trial(
184 MDPType& mdp,
185 EvaluatorType& heuristic,
186 StateID initial_state,
187 utils::CountdownTimer& timer);
188
189 bool check_and_solve(
190 MDPType& mdp,
191 EvaluatorType& heuristic,
192 StateID init_state_id,
193 utils::CountdownTimer& timer);
194};
195
196} // namespace probfd::algorithms::lrtdp
197
198#define GUARD_INCLUDE_PROBFD_ALGORITHMS_LRTDP_H
199#include "probfd/algorithms/lrtdp_impl.h"
200#undef GUARD_INCLUDE_PROBFD_ALGORITHMS_LRTDP_H
201
202#endif // PROBFD_ALGORITHMS_LRTDP_H
A registry for print functions related to search progress.
Definition progress_report.h:33
Heuristics search algorithm that can be used within FRET.
Definition heuristic_search_base.h:368
Implements the labelled real-time dynamic programming (LRTDP) algorithm bonet:geffner:icaps-03.
Definition lrtdp.h:129
LRTDP(std::shared_ptr< PolicyPickerType > policy_chooser, TrialTerminationCondition stop_consistent, std::shared_ptr< SuccessorSamplerType > succ_sampler)
Constructs an LRTDP solver object.
Definition lrtdp_impl.h:27
void print_additional_statistics(std::ostream &out) const override
Prints additional statistics to the output stream.
Definition lrtdp_impl.h:73
void reset_search_state() override
Resets the h search algorithm object to a clean state.
Definition lrtdp_impl.h:38
Namespace dedicated to labelled real-time dynamic programming (LRTDP).
Definition lrtdp.h:19
TrialTerminationCondition
Enumeration type specifying the termination condition for trials sampled during LRTDP.
Definition lrtdp.h:25
This namespace contains implementations of SSP search algorithms.
Definition acyclic_value_iteration.h:22
typename std::conditional_t< is_cheap_to_copy_v< T >, T, const T & > param_type
Alias template defining the best way to pass a parameter of a given type.
Definition type_traits.h:25
Represents a closed interval over the extended reals as a pair of lower and upper bound.
Definition interval.h:12
A StateID represents a state within a StateIDMap. Just like Fast Downward's StateID type,...
Definition types.h:22