28enum class BacktrackingUpdateType { DISABLED, ON_DEMAND, SINGLE };
33 utils::Timer trap_timer = utils::Timer(
true);
34 unsigned long long iterations = 0;
35 unsigned long long traps = 0;
36 unsigned long long reexpansions = 0;
37 unsigned long long fw_updates = 0;
38 unsigned long long bw_updates = 0;
40 void print(std::ostream& out)
const;
44template <
typename Action,
bool UseInterval>
45struct PerStateInformation
46 :
public heuristic_search::
47 PerStateBaseInformation<Action, true, UseInterval> {
49 using Base = heuristic_search::PerStateBaseInformation<Action, true, UseInterval>;
52 static constexpr uint8_t SOLVED = 1 << Base::BITS;
53 static constexpr uint8_t BITS = Base::BITS + 1;
54 static constexpr uint8_t MASK = 1 << Base::BITS;
57 bool is_solved()
const
59 return this->info & SOLVED || this->is_goal_or_terminal();
62 void set_solved() { this->info = (this->info & ~MASK) | SOLVED; }
67template <
typename,
typename,
bool>
68class TADepthFirstHeuristicSearch;
70template <
typename State,
typename Action,
bool UseInterval>
73 quotients::QuotientState<State, Action>,
74 quotients::QuotientAction<Action>,
75 internal::PerStateInformation<
76 quotients::QuotientAction<Action>,
78 using Base =
typename TADFHSImpl::HeuristicSearchBase;
80 using AlgorithmValueType = Base::AlgorithmValueType;
82 using QuotientSystem = quotients::QuotientSystem<State, Action>;
83 using QState = quotients::QuotientState<State, Action>;
84 using QAction = quotients::QuotientAction<Action>;
86 using QEvaluator =
typename Base::EvaluatorType;
87 using QuotientPolicyPicker =
typename Base::PolicyPickerType;
88 using StateInfo =
typename Base::StateInfo;
92 template <
typename,
typename,
bool>
93 friend class TADepthFirstHeuristicSearch;
100 struct ExplorationInformation {
103 std::vector<StateID> successors;
106 bool value_converged : 1 =
true;
108 bool all_solved : 1 =
true;
110 bool is_trap : 1 =
true;
112 explicit ExplorationInformation(
StateID state,
int stack_index)
114 , lowlink(stack_index)
118 bool next_successor();
121 void update(
const ExplorationInformation& other);
128 std::optional<QAction> action;
130 explicit StackInfo(
StateID state_id)
136 friend auto get(StackInfo& info)
138 if constexpr (i == 0)
return info.state_id;
139 if constexpr (i == 1)
return std::views::single(*info.action);
143 friend auto get(
const StackInfo& info)
145 if constexpr (i == 0)
return info.state_id;
146 if constexpr (i == 1)
return std::views::single(*info.action);
150 static constexpr int NEW = -1;
151 static constexpr int CLOSED = -2;
154 const bool forward_updates_;
155 const BacktrackingUpdateType backtrack_update_type_;
156 const bool cutoff_tip_;
157 const bool cutoff_inconsistent_;
158 const bool terminate_exploration_on_cutoff_;
159 const bool label_solved_;
160 const bool reexpand_traps_;
163 std::deque<ExplorationInformation> queue_;
164 std::vector<StackInfo> stack_;
165 std::vector<StateID> visited_states_;
166 storage::StateHashMap<int> stack_index_;
168 bool terminated_ =
false;
171 std::vector<Transition<QAction>> transitions_;
172 std::vector<AlgorithmValueType> qvalues_;
175 internal::Statistics statistics_;
182 std::shared_ptr<QuotientPolicyPicker> policy_chooser,
183 bool forward_updates,
184 BacktrackingUpdateType backtrack_update_type,
186 bool cutoff_inconsistent,
187 bool terminate_exploration_on_cutoff,
189 bool reexpand_traps);
192 QuotientSystem& quotient,
193 QEvaluator& heuristic,
198 void print_statistics(std::ostream& out)
const;
202 QuotientSystem& quotient,
203 QEvaluator& heuristic,
206 utils::CountdownTimer& timer);
208 void dfhs_label_driver(
209 QuotientSystem& quotient,
210 QEvaluator& heuristic,
213 utils::CountdownTimer& timer);
216 QuotientSystem& quotient,
217 ExplorationInformation& einfo,
222 bool advance(QuotientSystem& quotient, ExplorationInformation& einfo);
225 QuotientSystem& quotient,
226 ExplorationInformation& einfo,
227 utils::CountdownTimer& timer);
230 QuotientSystem& quotient,
231 QEvaluator& heuristic,
232 ExplorationInformation& einfo);
236 bool policy_exploration(
237 QuotientSystem& quotient,
238 QEvaluator& heuristic,
240 utils::CountdownTimer& timer);
242 UpdateResult value_iteration(
243 QuotientSystem& quotient,
244 const std::ranges::input_range
auto& range,
245 utils::CountdownTimer& timer);
248template <
typename State,
typename Action,
bool UseInterval>
249class TADepthFirstHeuristicSearch :
public MDPAlgorithm<State, Action> {
250 using Base =
typename TADepthFirstHeuristicSearch::MDPAlgorithm;
252 using PolicyType =
typename Base::PolicyType;
253 using MDPType =
typename Base::MDPType;
254 using EvaluatorType =
typename Base::EvaluatorType;
256 using QuotientSystem = quotients::QuotientSystem<State, Action>;
257 using QState = quotients::QuotientState<State, Action>;
258 using QAction = quotients::QuotientAction<Action>;
262 TADFHSImpl<State, Action, UseInterval> algorithm_;
268 TADepthFirstHeuristicSearch(
269 std::shared_ptr<QuotientPolicyPicker> policy_chooser,
270 bool forward_updates,
271 BacktrackingUpdateType backtrack_update_type,
273 bool cutoff_inconsistent,
274 bool stop_exploration_inconsistent,
276 bool reexpand_removed_traps);
280 EvaluatorType& heuristic,
283 double max_time)
override;
285 std::unique_ptr<PolicyType> compute_policy(
287 EvaluatorType& heuristic,
290 double max_time)
override;
292 void print_statistics(std::ostream& out)
const override;
298 bool was_visited(
StateID state_id)
const;