AI 24/25 Project Software
Documentation for the AI 24/25 course programming project software
Loading...
Searching...
No Matches
vdiff_tiebreaker_impl.h
1#include "probfd/policy_pickers/vdiff_tiebreaker.h"
2
3#include "probfd/algorithms/state_properties.h"
4
5#include "probfd/interval.h"
6
7namespace probfd::policy_pickers {
8
9template <typename State, typename Action>
10VDiffTiebreaker<State, Action>::VDiffTiebreaker(
11 bool stable_policy,
12 value_t favor_large_gaps)
13 : VDiffTiebreaker::StablePolicyPicker(stable_policy)
14 , favor_large_gaps_(favor_large_gaps)
15{
16}
17
18template <typename State, typename Action>
19int VDiffTiebreaker<State, Action>::pick_index(
20 MDP<State, Action>&,
21 std::optional<Action>,
22 const std::vector<Transition<Action>>& greedy_transitions,
23 algorithms::StateProperties& properties)
24{
25 auto it = std::ranges::min_element(
26 greedy_transitions,
27 [](value_t lhs, value_t rhs) { return is_approx_less(lhs, rhs); },
28 [&properties, factor = favor_large_gaps_](const Transition<Action>& t) {
29 return t.successor_dist.expectation([&](StateID id) {
30 return factor * properties.lookup_bounds(id).length();
31 });
32 });
33
34 return std::distance(greedy_transitions.begin(), it);
35}
36
37} // namespace probfd::policy_pickers
bool is_approx_less(value_t v1, value_t v2, value_t epsilon=g_epsilon)
Equivalent to .
double value_t
Typedef for the state value type.
Definition aliases.h:7