64 using Base =
typename TATopologicalValueIteration::MDPAlgorithm;
66 using MDPType =
typename Base::MDPType;
67 using EvaluatorType =
typename Base::EvaluatorType;
68 using PolicyType =
typename Base::PolicyType;
74 enum { NEW, CLOSED, ONSTACK };
76 static constexpr uint32_t UNDEF =
77 std::numeric_limits<uint32_t>::max() >> 1;
78 static constexpr uint32_t UNDEF_ECD =
79 std::numeric_limits<uint32_t>::max();
81 unsigned explored : 1 = 0;
82 unsigned stack_id : 31 = UNDEF;
83 unsigned ecd_stack_id = UNDEF_ECD;
86 auto get_status()
const;
89 auto get_ecd_status()
const;
98 mutable AlgorithmValueType conv_part;
102 std::vector<ItemProbabilityPair<StateID>> scc_successors;
104 template <
typename ValueStore>
105 AlgorithmValueType compute_q_value(ValueStore& value_store)
const;
108 struct ExplorationInfo {
111 StackInfo& stack_info;
118 std::vector<Action> aops;
122 typename Distribution<StateID>::const_iterator successor;
137 bool has_all_zero : 1 =
true;
141 StackInfo& stack_info,
142 unsigned int stackidx);
144 bool next_transition(MDPType& mdp);
145 bool forward_non_loop_transition(MDPType& mdp,
const State& state);
146 bool next_successor();
151 struct cmp_qval_info {
152 bool operator()(
const QValueInfo& left,
const QValueInfo& right)
const
154 return std::ranges::lexicographical_compare(
156 right.scc_successors,
157 [](
const auto& left,
const auto& right) {
158 return left.item < right.item ||
159 (left.item == right.item && is_approx_less(
170 AlgorithmValueType* value;
175 AlgorithmValueType conv_part;
179 std::set<QValueInfo, cmp_qval_info> non_ec_transitions;
184 std::vector<QValueInfo> ec_transitions;
186 struct ParentTransition {
188 unsigned parent_transition_idx;
191 struct TransitionFlags {
192 bool is_active_exiting : 1;
197 unsigned active_exit_transitions =
199 unsigned active_transitions = 0;
201 std::vector<TransitionFlags> transition_flags;
202 std::vector<ParentTransition> parents;
204 StackInfo(
StateID state_id, AlgorithmValueType& value_ref);
206 void add_non_ec_transition(QValueInfo&& info);
209 struct ECDExplorationInfo {
211 StackInfo& stack_info;
215 typename std::vector<QValueInfo>::iterator action;
216 typename std::vector<QValueInfo>::iterator end;
219 typename std::vector<ItemProbabilityPair<StateID>>::iterator successor;
228 bool recurse : 1 =
false;
231 bool leaves_scc : 1 =
false;
232 bool remains_scc : 1 =
false;
234 ECDExplorationInfo(StackInfo& stack_info,
unsigned stackidx);
236 bool next_transition();
237 bool next_successor();
242 struct DecompositionQueue {
243 std::vector<StateID> state_ids;
244 std::vector<std::size_t> scc_spans;
246 void reserve(std::size_t num_states)
248 state_ids.reserve(num_states);
249 scc_spans.reserve(num_states);
252 void register_new_scc() { scc_spans.push_back(state_ids.size()); }
254 void add_scc_state(
StateID state_id) { state_ids.push_back(state_id); }
256 bool pop_scc(std::vector<StateID>& r)
258 using namespace std::views;
262 if (state_ids.empty())
return false;
264 auto scc_view = state_ids | drop(scc_spans.back());
266 for (
const auto state_id : scc_view) {
267 r.push_back(state_id);
270 state_ids.erase(scc_view.begin(), scc_view.end());
272 scc_spans.pop_back();
278 storage::PerStateStorage<StateInfo> state_information_;
279 std::vector<ExplorationInfo> exploration_stack_;
280 std::deque<StackInfo> stack_;
282 std::vector<ECDExplorationInfo> exploration_stack_ecd_;
283 std::vector<StateID> stack_ecd_;
285 DecompositionQueue decomposition_queue_;
286 std::vector<StateID> scc_;
295 exploration_stack_.reserve(num_states_hint);
296 exploration_stack_ecd_.reserve(num_states_hint);
297 stack_ecd_.reserve(num_states_hint);
298 decomposition_queue_.reserve(num_states_hint);
299 scc_.reserve(num_states_hint);
304 EvaluatorType& heuristic,
307 double max_time)
override;
309 std::unique_ptr<PolicyType> compute_policy(
311 EvaluatorType& heuristic,
314 double max_time)
override;
333 const EvaluatorType& heuristic,
336 double max_time = std::numeric_limits<double>::infinity());
344 StateInfo& state_info,
345 AlgorithmValueType& value);
352 bool initialize_state(
354 const EvaluatorType& heuristic,
355 ExplorationInfo& exp_info,
368 ExplorationInfo& explore,
370 utils::CountdownTimer& timer);
377 ExplorationInfo& exp_info,
378 unsigned int stack_idx,
379 utils::CountdownTimer& timer);
381 void find_and_decompose_sccs(utils::CountdownTimer& timer);
383 bool initialize_ecd(ECDExplorationInfo& exp_info);
386 push_successor_ecd(ECDExplorationInfo& e, utils::CountdownTimer& timer);
388 void scc_found_ecd(ECDExplorationInfo& e);