1#ifndef ALGORITHMS_INT_HASH_SET_H
2#define ALGORITHMS_INT_HASH_SET_H
4#include "downward/utils/collections.h"
5#include "downward/utils/logging.h"
6#include "downward/utils/system.h"
15namespace int_hash_set {
72using HashType =
unsigned int;
74static_assert(
sizeof(KeyType) == 4,
"KeyType does not use 4 bytes");
75static_assert(
sizeof(HashType) == 4,
"HashType does not use 4 bytes");
77template <
typename Hasher,
typename Equal>
80 static const int MAX_DISTANCE = 32;
81 static const unsigned int MAX_BUCKETS =
82 std::numeric_limits<unsigned int>::max();
88 static const KeyType empty_bucket_key = -1;
91 : key(empty_bucket_key)
96 Bucket(KeyType key, HashType hash)
102 bool full()
const {
return key != empty_bucket_key; }
107 std::vector<Bucket> buckets;
111 int capacity()
const {
return buckets.size(); }
113 void rehash(
int new_capacity)
115 assert(new_capacity >= 1);
116 int num_entries_before = num_entries;
117 std::vector<Bucket> old_buckets = std::move(buckets);
118 assert(buckets.empty());
120 buckets.resize(new_capacity);
121 for (
const Bucket& bucket : old_buckets) {
123 insert(bucket.key, bucket.hash);
126 (void)num_entries_before;
127 assert(num_entries == num_entries_before);
133 unsigned int num_buckets = buckets.size();
135 assert((num_buckets & (num_buckets - 1)) == 0);
136 if (num_buckets > MAX_BUCKETS / 2) {
137 std::cerr <<
"IntHashSet surpassed maximum capacity. This means"
138 " you either use IntHashSet for high-memory"
139 " applications for which it was not designed, or there"
140 " is an unexpectedly high number of hash collisions"
141 " that should be investigated. Aborting."
143 utils::exit_with(utils::ExitCode::SEARCH_CRITICAL_ERROR);
145 rehash(num_buckets * 2);
148 int get_bucket(HashType hash)
const
150 assert(!buckets.empty());
151 unsigned int num_buckets = buckets.size();
153 assert((num_buckets & (num_buckets - 1)) == 0);
156 return hash & (num_buckets - 1);
163 int get_distance(
int index1,
int index2)
const
165 assert(utils::in_bounds(index1, buckets));
166 assert(utils::in_bounds(index2, buckets));
167 if (index2 >= index1) {
168 return index2 - index1;
170 return capacity() + index2 - index1;
174 int find_next_free_bucket_index(
int index)
const
176 assert(num_entries < capacity());
177 assert(utils::in_bounds(index, buckets));
178 while (buckets[index].full()) {
179 index = get_bucket(index + 1);
184 KeyType find_equal_key(KeyType key, HashType hash)
const
186 assert(hasher(key) == hash);
187 int ideal_index = get_bucket(hash);
188 for (
int i = 0; i < MAX_DISTANCE; ++i) {
189 int index = get_bucket(ideal_index + i);
190 const Bucket& bucket = buckets[index];
191 if (bucket.full() && bucket.hash == hash &&
192 equal(bucket.key, key)) {
196 return Bucket::empty_bucket_key;
213 std::pair<KeyType, bool> insert(KeyType key, HashType hash)
215 assert(hasher(key) == hash);
219 KeyType equal_key = find_equal_key(key, hash);
220 if (equal_key != Bucket::empty_bucket_key) {
221 return std::make_pair(equal_key,
false);
224 assert(num_entries <= capacity());
225 if (num_entries == capacity()) {
228 assert(num_entries < capacity());
231 int ideal_index = get_bucket(hash);
234 int free_index = find_next_free_bucket_index(ideal_index);
243 while (get_distance(ideal_index, free_index) >= MAX_DISTANCE) {
244 bool swapped =
false;
245 int num_buckets = capacity();
246 int max_offset = std::min(MAX_DISTANCE, num_buckets) - 1;
247 for (
int offset = max_offset; offset >= 1; --offset) {
248 assert(offset < num_buckets);
249 int candidate_index = free_index + num_buckets - offset;
250 assert(candidate_index >= 0);
251 candidate_index = get_bucket(candidate_index);
252 HashType candidate_hash = buckets[candidate_index].hash;
253 int candidate_ideal_index = get_bucket(candidate_hash);
254 if (get_distance(candidate_ideal_index, free_index) <
257 std::swap(buckets[candidate_index], buckets[free_index]);
258 free_index = candidate_index;
267 return insert(key, hash);
270 assert(utils::in_bounds(free_index, buckets));
271 assert(!buckets[free_index].full());
272 buckets[free_index] = Bucket(key, hash);
274 return std::make_pair(key,
true);
278 IntHashSet(
const Hasher& hasher,
const Equal& equal)
287 int size()
const {
return num_entries; }
296 std::pair<KeyType, bool> insert(KeyType key)
299 return insert(key, hasher(key));
302 void dump(utils::LogProxy& log)
const
304 int num_buckets = capacity();
306 for (
int i = 0; i < num_buckets; ++i) {
307 const Bucket& bucket = buckets[i];
313 if (i < num_buckets - 1) {
317 log <<
"]" << std::endl;
320 void print_statistics(utils::LogProxy& log)
const
322 assert(!buckets.empty());
323 int num_buckets = capacity();
324 assert(num_buckets != 0);
325 log <<
"Int hash set load factor: " << num_entries <<
"/" << num_buckets
326 <<
" = " <<
static_cast<double>(num_entries) / num_buckets
328 log <<
"Int hash set resizes: " << num_resizes << std::endl;
332template <
typename Hasher,
typename Equal>
333const int IntHashSet<Hasher, Equal>::MAX_DISTANCE;
335template <
typename Hasher,
typename Equal>
336const unsigned int IntHashSet<Hasher, Equal>::MAX_BUCKETS;