mdds
segment_tree.hpp
1/*************************************************************************
2 *
3 * Copyright (c) 2010-2015 Kohei Yoshida
4 *
5 * Permission is hereby granted, free of charge, to any person
6 * obtaining a copy of this software and associated documentation
7 * files (the "Software"), to deal in the Software without
8 * restriction, including without limitation the rights to use,
9 * copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following
12 * conditions:
13 *
14 * The above copyright notice and this permission notice shall be
15 * included in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
25 *
26 ************************************************************************/
27
28#ifndef INCLUDED_MDDS_SEGMENTTREE_HPP
29#define INCLUDED_MDDS_SEGMENTTREE_HPP
30
31#include "mdds/node.hpp"
32#include "mdds/global.hpp"
33
34#include <vector>
35#include <iostream>
36#include <map>
37#include <unordered_map>
38#include <memory>
39
40#ifdef MDDS_UNIT_TEST
41#include <sstream>
42#endif
43
44namespace mdds {
45
46template<typename _Key, typename _Value>
48{
49public:
50 typedef _Key key_type;
51 typedef _Value value_type;
52 typedef size_t size_type;
53 typedef std::vector<value_type> search_results_type;
54
55#ifdef MDDS_UNIT_TEST
56 struct segment_data
57 {
58 key_type begin_key;
59 key_type end_key;
60 value_type pdata;
61
62 segment_data(key_type _beg, key_type _end, value_type p) : begin_key(_beg), end_key(_end), pdata(p)
63 {}
64
65 bool operator==(const segment_data& r) const
66 {
67 return begin_key == r.begin_key && end_key == r.end_key && pdata == r.pdata;
68 }
69
70 bool operator!=(const segment_data& r) const
71 {
72 return !operator==(r);
73 }
74 };
75
76 struct segment_map_printer
77 {
78 void operator()(const ::std::pair<value_type, ::std::pair<key_type, key_type>>& r) const
79 {
80 using namespace std;
81 cout << r.second.first << "-" << r.second.second << ": " << r.first->name << endl;
82 }
83 };
84#endif
85
86public:
87 typedef ::std::vector<value_type> data_chain_type;
88 typedef std::unordered_map<value_type, ::std::pair<key_type, key_type>> segment_map_type;
89 typedef ::std::map<value_type, ::std::pair<key_type, key_type>> sorted_segment_map_type;
90
92 {
93 key_type low;
94 key_type high;
95 data_chain_type* data_chain;
96
97 bool operator==(const nonleaf_value_type& r) const
98 {
99 return low == r.low && high == r.high && data_chain == r.data_chain;
100 }
101 };
102
104 {
105 key_type key;
106 data_chain_type* data_chain;
107
108 bool operator==(const leaf_value_type& r) const
109 {
110 return key == r.key && data_chain == r.data_chain;
111 }
112 };
113
115 struct init_handler;
116 struct dispose_handler;
117#ifdef MDDS_UNIT_TEST
118 struct to_string_handler;
119#endif
120
122 typedef typename node::node_ptr node_ptr;
123
125
127 {
128 void operator()(
129 __st::nonleaf_node<segment_tree>& _self, const __st::node_base* left_node,
130 const __st::node_base* right_node)
131 {
132 // Parent node should carry the range of all of its child nodes.
133 if (left_node)
134 {
135 _self.value_nonleaf.low = left_node->is_leaf
136 ? static_cast<const node*>(left_node)->value_leaf.key
137 : static_cast<const nonleaf_node*>(left_node)->value_nonleaf.low;
138 }
139 else
140 {
141 // Having a left node is prerequisite.
142 throw general_error("segment_tree::fill_nonleaf_value_handler: Having a left node is prerequisite.");
143 }
144
145 if (right_node)
146 {
147 if (right_node->is_leaf)
148 {
149 // When the child nodes are leaf nodes, the upper bound
150 // must be the value of the node that comes after the
151 // right leaf node (if such node exists).
152
153 const node* p = static_cast<const node*>(right_node);
154 if (p->next)
155 _self.value_nonleaf.high = p->next->value_leaf.key;
156 else
157 _self.value_nonleaf.high = p->value_leaf.key;
158 }
159 else
160 {
161 _self.value_nonleaf.high = static_cast<const nonleaf_node*>(right_node)->value_nonleaf.high;
162 }
163 }
164 else
165 {
166 _self.value_nonleaf.high = left_node->is_leaf
167 ? static_cast<const node*>(left_node)->value_leaf.key
168 : static_cast<const nonleaf_node*>(left_node)->value_nonleaf.high;
169 }
170 }
171 };
172
173#ifdef MDDS_UNIT_TEST
174 struct to_string_handler
175 {
176 std::string operator()(const node& _self) const
177 {
178 std::ostringstream os;
179 os << "[" << _self.value_leaf.key << "] ";
180 return os.str();
181 }
182
183 std::string operator()(const __st::nonleaf_node<segment_tree>& _self) const
184 {
185 std::ostringstream os;
186 os << "[" << _self.value_nonleaf.low << "-" << _self.value_nonleaf.high << ")";
187 if (_self.value_nonleaf.data_chain)
188 {
189 os << " { ";
190 typename data_chain_type::const_iterator itr, itr_beg = _self.value_nonleaf.data_chain->begin(),
191 itr_end = _self.value_nonleaf.data_chain->end();
192 for (itr = itr_beg; itr != itr_end; ++itr)
193 {
194 if (itr != itr_beg)
195 os << ", ";
196 os << (*itr)->name;
197 }
198 os << " }";
199 }
200 os << " ";
201 return os.str();
202 }
203 };
204#endif
205
207 {
208 void operator()(node& _self)
209 {
210 _self.value_leaf.data_chain = nullptr;
211 }
212
213 void operator()(__st::nonleaf_node<segment_tree>& _self)
214 {
215 _self.value_nonleaf.data_chain = nullptr;
216 }
217 };
218
220 {
221 void operator()(node& _self)
222 {
223 delete _self.value_leaf.data_chain;
224 }
225
226 void operator()(__st::nonleaf_node<segment_tree>& _self)
227 {
228 delete _self.value_nonleaf.data_chain;
229 }
230 };
231
232#ifdef MDDS_UNIT_TEST
233 struct node_printer
234 {
235 void operator()(const __st::node_base* p) const
236 {
237 if (p->is_leaf)
238 std::cout << static_cast<const node*>(p)->to_string() << " ";
239 else
240 std::cout << static_cast<const nonleaf_node*>(p)->to_string() << " ";
241 }
242 };
243#endif
244
245private:
250 class search_results_base
251 {
252 public:
253 typedef std::vector<data_chain_type*> res_chains_type;
254 typedef std::shared_ptr<res_chains_type> res_chains_ptr;
255
256 public:
257 search_results_base() : mp_res_chains(static_cast<res_chains_type*>(nullptr))
258 {}
259
260 search_results_base(const search_results_base& r) : mp_res_chains(r.mp_res_chains)
261 {}
262
263 size_t size() const
264 {
265 size_t combined = 0;
266 if (!mp_res_chains)
267 return combined;
268
269 typename res_chains_type::const_iterator itr = mp_res_chains->begin(), itr_end = mp_res_chains->end();
270 for (; itr != itr_end; ++itr)
271 combined += (*itr)->size();
272 return combined;
273 }
274
275 void push_back_chain(data_chain_type* chain)
276 {
277 if (!chain || chain->empty())
278 return;
279
280 if (!mp_res_chains)
281 mp_res_chains.reset(new res_chains_type);
282 mp_res_chains->push_back(chain);
283 }
284
285 res_chains_ptr& get_res_chains()
286 {
287 return mp_res_chains;
288 }
289
290 private:
291 res_chains_ptr mp_res_chains;
292 };
293
294 class iterator_base
295 {
296 protected:
297 typedef typename search_results_base::res_chains_type res_chains_type;
298 typedef typename search_results_base::res_chains_ptr res_chains_ptr;
299
300 iterator_base(const res_chains_ptr& p) : mp_res_chains(p), m_end_pos(true)
301 {}
302
303 public:
304 typedef ::std::bidirectional_iterator_tag iterator_category;
305 typedef typename data_chain_type::value_type value_type;
306 typedef typename data_chain_type::pointer pointer;
307 typedef typename data_chain_type::reference reference;
308 typedef typename data_chain_type::difference_type difference_type;
309
310 iterator_base() : mp_res_chains(static_cast<res_chains_type*>(nullptr)), m_end_pos(true)
311 {}
312
313 iterator_base(const iterator_base& r)
314 : mp_res_chains(r.mp_res_chains), m_cur_chain(r.m_cur_chain), m_cur_pos_in_chain(r.m_cur_pos_in_chain),
315 m_end_pos(r.m_end_pos)
316 {}
317
318 iterator_base& operator=(const iterator_base& r)
319 {
320 mp_res_chains = r.mp_res_chains;
321 m_cur_chain = r.m_cur_chain;
322 m_cur_pos_in_chain = r.m_cur_pos_in_chain;
323 m_end_pos = r.m_end_pos;
324 return *this;
325 }
326
327 typename data_chain_type::value_type* operator++()
328 {
329 // We don't check for end position flag for performance reasons.
330 // The caller is responsible for making sure not to increment past
331 // end position.
332
333 // When reaching the end position, the internal iterators still
334 // need to be pointing at the last item before the end position.
335 // This is why we need to make copies of the iterators, and copy
336 // them back once done.
337
338 typename data_chain_type::iterator cur_pos_in_chain = m_cur_pos_in_chain;
339
340 if (++cur_pos_in_chain == (*m_cur_chain)->end())
341 {
342 // End of current chain. Inspect the next chain if exists.
343 typename res_chains_type::iterator cur_chain = m_cur_chain;
344 ++cur_chain;
345 if (cur_chain == mp_res_chains->end())
346 {
347 m_end_pos = true;
348 return nullptr;
349 }
350 m_cur_chain = cur_chain;
351 m_cur_pos_in_chain = (*m_cur_chain)->begin();
352 }
353 else
354 ++m_cur_pos_in_chain;
355
356 return operator->();
357 }
358
359 typename data_chain_type::value_type* operator--()
360 {
361 if (!mp_res_chains)
362 return nullptr;
363
364 if (m_end_pos)
365 {
366 m_end_pos = false;
367 return &(*m_cur_pos_in_chain);
368 }
369
370 if (m_cur_pos_in_chain == (*m_cur_chain)->begin())
371 {
372 if (m_cur_chain == mp_res_chains->begin())
373 {
374 // Already at the first data chain. Don't move the iterator position.
375 return nullptr;
376 }
377 --m_cur_chain;
378 m_cur_pos_in_chain = (*m_cur_chain)->end();
379 }
380 --m_cur_pos_in_chain;
381 return operator->();
382 }
383
384 bool operator==(const iterator_base& r) const
385 {
386 if (mp_res_chains.get())
387 {
388 // non-empty result set.
389 return mp_res_chains.get() == r.mp_res_chains.get() && m_cur_chain == r.m_cur_chain &&
390 m_cur_pos_in_chain == r.m_cur_pos_in_chain && m_end_pos == r.m_end_pos;
391 }
392
393 // empty result set.
394 if (r.mp_res_chains.get())
395 return false;
396 return m_end_pos == r.m_end_pos;
397 }
398
399 bool operator!=(const iterator_base& r) const
400 {
401 return !operator==(r);
402 }
403
404 typename data_chain_type::value_type& operator*()
405 {
406 return *m_cur_pos_in_chain;
407 }
408
409 typename data_chain_type::value_type* operator->()
410 {
411 return &(*m_cur_pos_in_chain);
412 }
413
414 protected:
415 void move_to_front()
416 {
417 if (!mp_res_chains)
418 {
419 // Empty data set.
420 m_end_pos = true;
421 return;
422 }
423
424 // We assume that there is at least one chain list, and no
425 // empty chain list exists. So, skip the check.
426 m_cur_chain = mp_res_chains->begin();
427 m_cur_pos_in_chain = (*m_cur_chain)->begin();
428 m_end_pos = false;
429 }
430
431 void move_to_end()
432 {
433 m_end_pos = true;
434 if (!mp_res_chains)
435 // Empty data set.
436 return;
437
438 m_cur_chain = mp_res_chains->end();
439 --m_cur_chain;
440 m_cur_pos_in_chain = (*m_cur_chain)->end();
441 --m_cur_pos_in_chain;
442 }
443
444 private:
445 res_chains_ptr mp_res_chains;
446 typename res_chains_type::iterator m_cur_chain;
447 typename data_chain_type::iterator m_cur_pos_in_chain;
448 bool m_end_pos : 1;
449 };
450
451public:
452 class search_results : public search_results_base
453 {
454 typedef typename search_results_base::res_chains_type res_chains_type;
455 typedef typename search_results_base::res_chains_ptr res_chains_ptr;
456
457 public:
458 class iterator : public iterator_base
459 {
460 friend class segment_tree<_Key, _Value>::search_results;
461
462 private:
463 iterator(const res_chains_ptr& p) : iterator_base(p)
464 {}
465
466 public:
467 iterator() : iterator_base()
468 {}
469 };
470
471 typename search_results::iterator begin()
472 {
473 typename search_results::iterator itr(search_results_base::get_res_chains());
474 itr.move_to_front();
475 return itr;
476 }
477
478 typename search_results::iterator end()
479 {
480 typename search_results::iterator itr(search_results_base::get_res_chains());
481 itr.move_to_end();
482 return itr;
483 }
484 };
485
487 {
488 public:
489 search_result_vector_inserter(search_results_type& result) : m_result(result)
490 {}
491 void operator()(data_chain_type* node_data)
492 {
493 if (!node_data)
494 return;
495
496 typename data_chain_type::const_iterator itr = node_data->begin(), itr_end = node_data->end();
497 for (; itr != itr_end; ++itr)
498 m_result.push_back(*itr);
499 }
500
501 private:
502 search_results_type& m_result;
503 };
504
506 {
507 public:
508 search_result_inserter(search_results_base& result) : m_result(result)
509 {}
510 void operator()(data_chain_type* node_data)
511 {
512 if (!node_data)
513 return;
514
515 m_result.push_back_chain(node_data);
516 }
517
518 private:
519 search_results_base& m_result;
520 };
521
522 segment_tree();
523 segment_tree(const segment_tree& r);
525
530 bool operator==(const segment_tree& r) const;
531
532 bool operator!=(const segment_tree& r) const
533 {
534 return !operator==(r);
535 }
536
543 bool is_tree_valid() const
544 {
545 return m_valid_tree;
546 }
547
552
562 bool insert(key_type begin_key, key_type end_key, value_type pdata);
563
580 bool search(key_type point, search_results_type& result) const;
581
591 search_results search(key_type point) const;
592
600 void remove(value_type value);
601
605 void clear();
606
610 size_t size() const;
611
615 bool empty() const;
616
622 size_t leaf_size() const;
623
624#ifdef MDDS_UNIT_TEST
625 void dump_tree() const;
626 void dump_leaf_nodes() const;
627 void dump_segment_data() const;
628 bool verify_node_lists() const;
629
630 struct leaf_node_check
631 {
632 key_type key;
633 data_chain_type data_chain;
634 };
635
636 bool verify_leaf_nodes(const ::std::vector<leaf_node_check>& checks) const;
637
644 bool verify_segment_data(const segment_map_type& checks) const;
645#endif
646
647private:
651 void search(key_type point, search_results_base& result) const;
652
653 typedef std::vector<__st::node_base*> node_list_type;
654 typedef std::map<value_type, std::unique_ptr<node_list_type>> data_node_map_type;
655
656 static void create_leaf_node_instances(const ::std::vector<key_type>& keys, node_ptr& left, node_ptr& right);
657
663 void descend_tree_and_mark(
664 __st::node_base* pnode, value_type pdata, key_type begin_key, key_type end_key, node_list_type* plist);
665
666 void build_leaf_nodes();
667
672 void remove_data_from_nodes(node_list_type* plist, const value_type pdata);
673 void remove_data_from_chain(data_chain_type& chain, const value_type pdata);
674
675 void clear_all_nodes();
676
677#ifdef MDDS_UNIT_TEST
678 static bool has_data_pointer(const node_list_type& node_list, const value_type pdata);
679 static void print_leaf_value(const leaf_value_type& v);
680#endif
681
682private:
683 std::vector<nonleaf_node> m_nonleaf_node_pool;
684
685 segment_map_type m_segment_data;
686
692 data_node_map_type m_tagged_node_map;
693
694 nonleaf_node* m_root_node;
695 node_ptr m_left_leaf;
696 node_ptr m_right_leaf;
697 bool m_valid_tree : 1;
698};
699
700} // namespace mdds
701
702#include "segment_tree_def.inl"
703
704#endif
Definition: global.hpp:84
Definition: segment_tree.hpp:506
Definition: segment_tree.hpp:459
Definition: segment_tree.hpp:453
Definition: segment_tree.hpp:48
bool insert(key_type begin_key, key_type end_key, value_type pdata)
bool operator==(const segment_tree &r) const
bool empty() const
bool search(key_type point, search_results_type &result) const
void remove(value_type value)
size_t leaf_size() const
bool is_tree_valid() const
Definition: segment_tree.hpp:543
size_t size() const
search_results search(key_type point) const
Definition: node.hpp:44
bool is_leaf
parent nonleaf_node
Definition: node.hpp:46
Definition: node.hpp:140
node_ptr next
previous sibling leaf node.
Definition: node.hpp:162
Definition: node.hpp:56
Definition: segment_tree.hpp:220
Definition: segment_tree.hpp:127
Definition: segment_tree.hpp:207
Definition: segment_tree.hpp:104
Definition: segment_tree.hpp:92
key_type high
low range value (inclusive)
Definition: segment_tree.hpp:94
data_chain_type * data_chain
high range value (non-inclusive)
Definition: segment_tree.hpp:95