24enum class TrialTerminationCondition {
34 unsigned long long trials = 0;
35 unsigned long long trial_bellman_backups = 0;
36 unsigned long long check_and_solve_bellman_backups = 0;
37 unsigned long long traps = 0;
38 unsigned long long trial_length = 0;
39 utils::Timer trap_timer = utils::Timer(
true);
41 void print(std::ostream& out)
const;
45template <
typename Action,
bool UseInterval>
46struct PerStateInformation
47 :
public heuristic_search::
48 PerStateBaseInformation<Action, true, UseInterval> {
50 using Base = heuristic_search::PerStateBaseInformation<Action, true, UseInterval>;
53 static constexpr uint8_t MARKED_TRIAL = 1 << Base::BITS;
54 static constexpr uint8_t SOLVED = 2 << Base::BITS;
55 static constexpr uint8_t BITS = Base::BITS + 2;
56 static constexpr uint8_t MASK = 3 << Base::BITS;
58 bool is_solved()
const
60 return (this->info & MASK) == SOLVED || this->is_goal_or_terminal();
63 bool is_on_trial()
const {
return (this->info & MARKED_TRIAL); }
65 void set_solved() { this->info = (this->info & ~MASK) | SOLVED; }
66 void set_on_trial() { this->info = this->info | MARKED_TRIAL; }
68 void clear_trial_flag() { this->info = (this->info & ~MARKED_TRIAL); }
73template <
typename,
typename,
bool>
76template <
typename State,
typename Action,
bool UseInterval>
79 quotients::QuotientState<State, Action>,
80 quotients::QuotientAction<Action>,
81 internal::PerStateInformation<
82 quotients::QuotientAction<Action>,
84 using Base =
typename TALRTDPImpl::HeuristicSearchBase;
86 using AlgorithmValueType = Base::AlgorithmValueType;
88 using QuotientSystem = quotients::QuotientSystem<State, Action>;
89 using QState = quotients::QuotientState<State, Action>;
90 using QAction = quotients::QuotientAction<Action>;
92 using QEvaluator =
typename Base::EvaluatorType;
93 using QuotientPolicyPicker =
typename Base::PolicyPickerType;
94 using StateInfo =
typename Base::StateInfo;
98 template <
typename,
typename,
bool>
101 struct ExplorationInformation {
102 explicit ExplorationInformation(
StateID state_id)
108 std::vector<StateID> successors;
109 bool is_root : 1 =
true;
110 bool is_trap : 1 =
true;
113 bool next_successor();
117 void update(
const ExplorationInformation& backtracked)
119 is_trap = is_trap && backtracked.is_trap;
120 rv = rv && backtracked.rv;
123 void update(
const StateInfo& succ_info)
126 rv = rv && succ_info.is_solved();
138 std::vector<QAction> aops;
140 explicit StackInfo(
StateID state_id)
146 friend auto& get(StackInfo& info)
148 if constexpr (i == 0)
return info.state_id;
149 if constexpr (i == 1)
return info.aops;
153 friend const auto& get(
const StackInfo& info)
155 if constexpr (i == 0)
return info.state_id;
156 if constexpr (i == 1)
return info.aops;
160 static constexpr int STATE_UNSEEN = -1;
161 static constexpr int STATE_CLOSED = -2;
164 const TrialTerminationCondition stop_at_consistent_;
165 const bool reexpand_traps_;
166 const std::shared_ptr<QuotientSuccessorSampler> sample_;
169 std::deque<ExplorationInformation> queue_;
170 std::deque<StackInfo> stack_;
171 storage::StateHashMap<int> stack_index_;
173 std::deque<StateID> current_trial_;
175 internal::Statistics statistics_;
178 std::vector<Transition<QAction>> transitions_;
179 std::vector<AlgorithmValueType> qvalues_;
186 std::shared_ptr<QuotientPolicyPicker> policy_chooser,
187 TrialTerminationCondition stop_consistent,
189 std::shared_ptr<QuotientSuccessorSampler> succ_sampler);
192 QuotientSystem& quotient,
193 QEvaluator& heuristic,
198 void print_statistics(std::ostream& out)
const;
202 QuotientSystem& quotient,
203 QEvaluator& heuristic,
205 utils::CountdownTimer& timer);
207 bool check_and_solve(
208 QuotientSystem& quotient,
209 QEvaluator& heuristic,
210 utils::CountdownTimer& timer);
213 QuotientSystem& quotient,
214 ExplorationInformation& einfo,
215 utils::CountdownTimer& timer);
220 QuotientSystem& quotient,
221 QEvaluator& heuristic,
223 StateInfo& state_info,
224 ExplorationInformation& e_info);
227template <
typename State,
typename Action,
bool UseInterval>
229 using Base =
typename TALRTDP::MDPAlgorithm;
231 using QuotientSystem = quotients::QuotientSystem<State, Action>;
232 using QState = quotients::QuotientState<State, Action>;
233 using QAction = quotients::QuotientAction<Action>;
235 using MDPType =
typename Base::MDPType;
236 using EvaluatorType =
typename Base::EvaluatorType;
237 using PolicyType =
typename Base::PolicyType;
242 TALRTDPImpl<State, Action, UseInterval> algorithm_;
249 std::shared_ptr<QuotientPolicyPicker> policy_chooser,
250 TrialTerminationCondition stop_consistent,
252 std::shared_ptr<QuotientSuccessorSampler> succ_sampler);
256 EvaluatorType& heuristic,
259 double max_time)
final;
261 std::unique_ptr<PolicyType> compute_policy(
263 EvaluatorType& heuristic,
266 double max_time)
final;
268 void print_statistics(std::ostream& out)
const final;