Open3D (C++ API)  0.17.0
SlabHashBackendImpl.h
Go to the documentation of this file.
1// ----------------------------------------------------------------------------
2// - Open3D: www.open3d.org -
3// ----------------------------------------------------------------------------
4// Copyright (c) 2018-2023 www.open3d.org
5// SPDX-License-Identifier: MIT
6// ----------------------------------------------------------------------------
7
8// Copyright 2019 Saman Ashkiani
9//
10// Licensed under the Apache License, Version 2.0 (the "License");
11// you may not use this file except in compliance with the License.
12// You may obtain a copy of the License at
13//
14// http://www.apache.org/licenses/LICENSE-2.0
15//
16// Unless required by applicable law or agreed to in writing, software
17// distributed under the License is distributed on an "AS IS" BASIS,
18// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
19// implied. See the License for the specific language governing permissions
20// and limitations under the License.
21
22#pragma once
23
30
31namespace open3d {
32namespace core {
33
34// Each slab contains a collection of uint32_t entries.
35// Each uint32_t entry can represent:
36// 0) an empty placeholder;
37// 1) a stored buf_index;
38// 2) a ptr to the next slab if at the end of the slab.
39// In case 0) and 1), it is interpreted as a buf_index_t.
40// In case 2), it is interpreted as uint32_t.
41// They are equivalent, but we differentiate them in the implementation to
42// emphasize the differences.
43
44template <typename Key, typename Hash, typename Eq>
46public:
48
49 __host__ void Setup(int64_t init_buckets,
50 const SlabNodeManagerImpl& node_mgr_impl,
51 const CUDAHashBackendBufferAccessor& buffer_accessor);
52
54 __device__ bool Insert(bool lane_active,
55 uint32_t lane_id,
56 uint32_t bucket_id,
57 const Key& key,
58 buf_index_t buf_index);
59
61 __device__ Pair<buf_index_t, bool> Find(bool lane_active,
62 uint32_t lane_id,
63 uint32_t bucket_id,
64 const Key& key);
65
67 __device__ Pair<buf_index_t, bool> Erase(bool lane_active,
68 uint32_t lane_id,
69 uint32_t bucket_id,
70 const Key& key);
71
73 __device__ void WarpSyncKey(const Key& key, uint32_t lane_id, Key& ret_key);
74
76 __device__ int32_t WarpFindKey(const Key& src_key,
77 uint32_t lane_id,
78 uint32_t slab_entry);
79
81 __device__ int32_t WarpFindEmpty(uint32_t slab_entry);
82
83 // Hash function.
84 __device__ int64_t ComputeBucket(const Key& key) const;
85
86 // Node manager.
87 __device__ uint32_t AllocateSlab(uint32_t lane_id);
88 __device__ void FreeSlab(uint32_t slab_ptr);
89
90 // Helpers.
91 __device__ uint32_t* SlabEntryPtr(uint32_t bucket_id,
92 uint32_t lane_id,
93 uint32_t slab_ptr) {
94 return (slab_ptr == kHeadSlabAddr)
95 ? SlabEntryPtrFromHead(bucket_id, lane_id)
96 : SlabEntryPtrFromNodes(slab_ptr, lane_id);
97 }
98
100 uint32_t lane_id) {
101 return node_mgr_impl_.get_unit_ptr_from_slab(slab_ptr, lane_id);
102 }
104 uint32_t lane_id) {
105 return reinterpret_cast<uint32_t*>(bucket_list_head_) +
106 bucket_id * kWarpSize + lane_id;
107 }
108
109public:
113
117
118 // TODO: verify size with alignment
119 int key_size_in_int_ = sizeof(Key) / sizeof(int);
120};
121
123template <typename Key, typename Hash, typename Eq>
125 const void* input_keys,
126 buf_index_t* output_buf_indices,
127 int heap_counter_prev,
128 int64_t count);
129
130template <typename Key, typename Hash, typename Eq>
132 const void* input_keys,
133 buf_index_t* output_buf_indices,
134 bool* output_masks,
135 int64_t count);
136
137template <typename Key, typename Hash, typename Eq, typename block_t>
139 const void* const* input_values_soa,
140 buf_index_t* output_buf_indices,
141 bool* output_masks,
142 int64_t count,
143 int64_t n_values);
144
145template <typename Key, typename Hash, typename Eq>
147 const void* input_keys,
148 buf_index_t* output_buf_indices,
149 bool* output_masks,
150 int64_t count);
151
152template <typename Key, typename Hash, typename Eq>
154 const void* input_keys,
155 buf_index_t* output_buf_indices,
156 bool* output_masks,
157 int64_t count);
158
159template <typename Key, typename Hash, typename Eq>
161 buf_index_t* output_buf_indices,
162 bool* output_masks,
163 int64_t count);
164
165template <typename Key, typename Hash, typename Eq>
167 buf_index_t* output_buf_indices,
168 uint32_t* output_count);
169
170template <typename Key, typename Hash, typename Eq>
171__global__ void CountElemsPerBucketKernel(
172 SlabHashBackendImpl<Key, Hash, Eq> impl, int64_t* bucket_elem_counts);
173
174template <typename Key, typename Hash, typename Eq>
176 : bucket_count_(0), bucket_list_head_(nullptr) {}
177
178template <typename Key, typename Hash, typename Eq>
180 int64_t init_buckets,
181 const SlabNodeManagerImpl& allocator_impl,
182 const CUDAHashBackendBufferAccessor& buffer_accessor) {
183 bucket_count_ = init_buckets;
184 node_mgr_impl_ = allocator_impl;
185 buffer_accessor_ = buffer_accessor;
186}
187
188template <typename Key, typename Hash, typename Eq>
190 bool lane_active,
191 uint32_t lane_id,
192 uint32_t bucket_id,
193 const Key& key,
194 buf_index_t buf_index) {
195 uint32_t work_queue = 0;
196 uint32_t prev_work_queue = 0;
197 uint32_t slab_ptr = kHeadSlabAddr;
198 Key src_key;
199
200 bool mask = false;
201
202 // > Loop when we have active lanes
203 while ((work_queue = __ballot_sync(kSyncLanesMask, lane_active))) {
204 // 0. Restart from linked list head if last insertion is finished
205 slab_ptr = (prev_work_queue != work_queue) ? kHeadSlabAddr : slab_ptr;
206 uint32_t src_lane = __ffs(work_queue) - 1;
207 uint32_t src_bucket =
208 __shfl_sync(kSyncLanesMask, bucket_id, src_lane, kWarpSize);
209 WarpSyncKey(key, src_lane, src_key);
210
211 uint32_t slab_entry = *SlabEntryPtr(src_bucket, lane_id, slab_ptr);
212
213 int32_t lane_found = WarpFindKey(src_key, lane_id, slab_entry);
214 int32_t lane_empty = WarpFindEmpty(slab_entry);
215
216 // Branch 1: key already existing, ABORT
217 if (lane_found >= 0) {
218 if (lane_id == src_lane) {
219 lane_active = false;
220 }
221 }
222
223 // Branch 2: empty slot available, try to insert
224 else if (lane_empty >= 0) {
225 // Cannot merge if statements.
226 // otherwise the warp flow will be interrupted.
227 if (lane_id == src_lane) {
228 // Now regard the entry as a value of buf_index
229 const uint32_t* empty_entry_ptr =
230 SlabEntryPtr(src_bucket, lane_empty, slab_ptr);
231
232 uint32_t old_empty_entry_value =
233 atomicCAS((unsigned int*)empty_entry_ptr,
234 kEmptyNodeAddr, buf_index);
235
236 // Branch 2.1: SUCCEED
237 if (old_empty_entry_value == kEmptyNodeAddr) {
238 lane_active = false;
239 mask = true;
240 }
241 // Branch 2.2: failed: RESTART
242 // In the consequent attempt,
243 // > if the same key was inserted in this slot,
244 // we fall back to Branch 1;
245 // > if a different key was inserted,
246 // we go to Branch 2 or 3.
247 }
248 }
249
250 // Branch 3: nothing found in this slab, goto next slab
251 else {
252 // broadcast next slab
253 uint32_t next_slab_ptr = __shfl_sync(kSyncLanesMask, slab_entry,
254 kNextSlabPtrLaneId, kWarpSize);
255
256 // Branch 3.1: next slab existing, RESTART at updated slab ptr
257 if (next_slab_ptr != kEmptySlabAddr) {
258 slab_ptr = next_slab_ptr;
259 }
260
261 // Branch 3.2: next slab empty, try to allocate one from the Slab
262 // buffer.
263 else {
264 // Warp allocate, must be outside the condition clause.
265 uint32_t new_next_slab_ptr = AllocateSlab(lane_id);
266
267 if (lane_id == kNextSlabPtrLaneId) {
268 const uint32_t* next_slab_entry_ptr = SlabEntryPtr(
269 src_bucket, kNextSlabPtrLaneId, slab_ptr);
270
271 uint32_t old_next_slab_entry_value =
272 atomicCAS((unsigned int*)next_slab_entry_ptr,
273 kEmptySlabAddr, new_next_slab_ptr);
274
275 // Branch 3.2.1: other thread has allocated,
276 // RESTART. In the consequent attempt, goto Branch 2.
277 if (old_next_slab_entry_value != kEmptySlabAddr) {
278 FreeSlab(new_next_slab_ptr);
279 }
280
281 // Branch 3.2.2: this thread allocated successfully.
282 // RESTART, goto Branch 2
283 }
284 }
285 }
286
287 prev_work_queue = work_queue;
288 }
289
290 return mask;
291}
292
293template <typename Key, typename Hash, typename Eq>
295 bool lane_active,
296 uint32_t lane_id,
297 uint32_t bucket_id,
298 const Key& query_key) {
299 uint32_t work_queue = 0;
300 uint32_t prev_work_queue = work_queue;
301 uint32_t slab_ptr = kHeadSlabAddr;
302
303 buf_index_t buf_index = kNullAddr;
304 bool mask = false;
305
306 // > Loop when we have active lanes.
307 while ((work_queue = __ballot_sync(kSyncLanesMask, lane_active))) {
308 // 0. Restart from linked list head if the last query is finished.
309 slab_ptr = (prev_work_queue != work_queue) ? kHeadSlabAddr : slab_ptr;
310 uint32_t src_lane = __ffs(work_queue) - 1;
311 uint32_t src_bucket =
312 __shfl_sync(kSyncLanesMask, bucket_id, src_lane, kWarpSize);
313
314 Key src_key;
315 WarpSyncKey(query_key, src_lane, src_key);
316
317 // Each lane in the warp reads a unit in the slab in parallel.
318 const uint32_t slab_entry =
319 *SlabEntryPtr(src_bucket, lane_id, slab_ptr);
320
321 int32_t lane_found = WarpFindKey(src_key, lane_id, slab_entry);
322
323 // 1. Found in this slab, SUCCEED.
324 if (lane_found >= 0) {
325 // broadcast found value
326 uint32_t found_buf_index = __shfl_sync(kSyncLanesMask, slab_entry,
327 lane_found, kWarpSize);
328
329 if (lane_id == src_lane) {
330 lane_active = false;
331 buf_index = found_buf_index;
332 mask = true;
333 }
334 }
335
336 // 2. Not found in this slab.
337 else {
338 // Broadcast next slab: lane 31 reads 'next'.
339 uint32_t next_slab_ptr = __shfl_sync(kSyncLanesMask, slab_entry,
340 kNextSlabPtrLaneId, kWarpSize);
341
342 // 2.1. Next slab is empty, ABORT.
343 if (next_slab_ptr == kEmptySlabAddr) {
344 if (lane_id == src_lane) {
345 lane_active = false;
346 }
347 }
348 // 2.2. Next slab exists, RESTART.
349 else {
350 slab_ptr = next_slab_ptr;
351 }
352 }
353
354 prev_work_queue = work_queue;
355 }
356
357 return make_pair(buf_index, mask);
358}
359
360template <typename Key, typename Hash, typename Eq>
362 bool lane_active,
363 uint32_t lane_id,
364 uint32_t bucket_id,
365 const Key& key) {
366 uint32_t work_queue = 0;
367 uint32_t prev_work_queue = 0;
368 uint32_t slab_ptr = kHeadSlabAddr;
369 Key src_key;
370
371 buf_index_t buf_index = 0;
372 bool mask = false;
373
374 // > Loop when we have active lanes.
375 while ((work_queue = __ballot_sync(kSyncLanesMask, lane_active))) {
376 // 0. Restart from linked list head if last insertion is finished.
377 slab_ptr = (prev_work_queue != work_queue) ? kHeadSlabAddr : slab_ptr;
378 uint32_t src_lane = __ffs(work_queue) - 1;
379 uint32_t src_bucket =
380 __shfl_sync(kSyncLanesMask, bucket_id, src_lane, kWarpSize);
381
382 WarpSyncKey(key, src_lane, src_key);
383
384 const uint32_t slab_entry =
385 *SlabEntryPtr(src_bucket, lane_id, slab_ptr);
386
387 int32_t lane_found = WarpFindKey(src_key, lane_id, slab_entry);
388
389 // Branch 1: key found.
390 if (lane_found >= 0) {
391 if (lane_id == src_lane) {
392 uint32_t* found_entry_ptr =
393 SlabEntryPtr(src_bucket, lane_found, slab_ptr);
394
395 uint32_t old_found_entry_value = atomicExch(
396 (unsigned int*)found_entry_ptr, kEmptyNodeAddr);
397
398 // Branch 1.2: other thread might have done the job,
399 // avoid double free.
400 mask = (old_found_entry_value != kEmptyNodeAddr);
401 buf_index = old_found_entry_value;
402 }
403 } else { // no matching slot found:
404 uint32_t next_slab_ptr = __shfl_sync(kSyncLanesMask, slab_entry,
405 kNextSlabPtrLaneId, kWarpSize);
406 if (next_slab_ptr == kEmptySlabAddr) {
407 // not found:
408 if (lane_id == src_lane) {
409 lane_active = false;
410 }
411 } else {
412 slab_ptr = next_slab_ptr;
413 }
414 }
415 prev_work_queue = work_queue;
416 }
417
418 return make_pair(buf_index, mask);
419}
420
421template <typename Key, typename Hash, typename Eq>
423 const Key& key, uint32_t lane_id, Key& ret_key) {
424 auto dst_key_ptr = reinterpret_cast<int*>(&ret_key);
425 auto src_key_ptr = reinterpret_cast<const int*>(&key);
426 for (int i = 0; i < key_size_in_int_; ++i) {
427 dst_key_ptr[i] =
428 __shfl_sync(kSyncLanesMask, src_key_ptr[i], lane_id, kWarpSize);
429 }
430}
431
432template <typename Key, typename Hash, typename Eq>
434 const Key& key, uint32_t lane_id, uint32_t slab_entry) {
435 bool is_lane_found =
436 // Select key lanes.
437 ((1 << lane_id) & kNodePtrLanesMask)
438 // Validate key buf_indices.
439 && (slab_entry != kEmptyNodeAddr)
440 // Find keys in buffer. Now slab_entry is interpreted as buf_index.
441 &&
442 eq_fn_(*static_cast<Key*>(buffer_accessor_.GetKeyPtr(slab_entry)),
443 key);
444
445 return __ffs(__ballot_sync(kNodePtrLanesMask, is_lane_found)) - 1;
446}
447
448template <typename Key, typename Hash, typename Eq>
449__device__ int32_t
451 bool is_lane_empty = (slab_entry == kEmptyNodeAddr);
452 return __ffs(__ballot_sync(kNodePtrLanesMask, is_lane_empty)) - 1;
453}
454
455template <typename Key, typename Hash, typename Eq>
456__device__ int64_t
458 return hash_fn_(key) % bucket_count_;
459}
460
461template <typename Key, typename Hash, typename Eq>
462__device__ uint32_t
464 return node_mgr_impl_.WarpAllocate(lane_id);
465}
466
467template <typename Key, typename Hash, typename Eq>
468__device__ __forceinline__ void SlabHashBackendImpl<Key, Hash, Eq>::FreeSlab(
469 uint32_t slab_ptr) {
470 node_mgr_impl_.FreeUntouched(slab_ptr);
471}
472
473template <typename Key, typename Hash, typename Eq>
475 const void* input_keys,
476 buf_index_t* output_buf_indices,
477 int heap_counter_prev,
478 int64_t count) {
479 const Key* input_keys_templated = static_cast<const Key*>(input_keys);
480 uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;
481
482 if (tid < count) {
483 // First write ALL input_keys to avoid potential thread conflicts.
484 buf_index_t buf_index =
485 impl.buffer_accessor_.heap_[heap_counter_prev + tid];
486 void* key = impl.buffer_accessor_.GetKeyPtr(buf_index);
487 *static_cast<Key*>(key) = input_keys_templated[tid];
488 output_buf_indices[tid] = buf_index;
489 }
490}
491
492template <typename Key, typename Hash, typename Eq>
494 const void* input_keys,
495 buf_index_t* output_buf_indices,
496 bool* output_masks,
497 int64_t count) {
498 const Key* input_keys_templated = static_cast<const Key*>(input_keys);
499 uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;
500 uint32_t lane_id = tid & 0x1F;
501
502 if (tid - lane_id >= count) {
503 return;
504 }
505
506 impl.node_mgr_impl_.Init(tid, lane_id);
507
508 bool lane_active = false;
509 uint32_t bucket_id = 0;
510 buf_index_t buf_index = 0;
511
512 // Dummy for warp sync.
513 Key key;
514 if (tid < count) {
515 lane_active = true;
516 key = input_keys_templated[tid];
517 buf_index = output_buf_indices[tid];
518 bucket_id = impl.ComputeBucket(key);
519 }
520
521 // Index out-of-bound threads still have to run for warp synchronization.
522 bool mask = impl.Insert(lane_active, lane_id, bucket_id, key, buf_index);
523
524 if (tid < count) {
525 output_masks[tid] = mask;
526 }
527}
528
529template <typename Key, typename Hash, typename Eq, typename block_t>
531 const void* const* input_values_soa,
532 buf_index_t* output_buf_indices,
533 bool* output_masks,
534 int64_t count,
535 int64_t n_values) {
536 uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;
537
538 if (tid < count) {
539 buf_index_t buf_index = output_buf_indices[tid];
540
541 if (output_masks[tid]) {
542 for (int j = 0; j < n_values; ++j) {
543 int64_t blocks_per_element =
545
546 block_t* dst_value = static_cast<block_t*>(
547 impl.buffer_accessor_.GetValuePtr(buf_index, j));
548 const block_t* src_value =
549 static_cast<const block_t*>(input_values_soa[j]) +
550 blocks_per_element * tid;
551 for (int b = 0; b < blocks_per_element; ++b) {
552 dst_value[b] = src_value[b];
553 }
554 }
555 } else {
556 impl.buffer_accessor_.DeviceFree(buf_index);
557 }
558 }
559}
560
561template <typename Key, typename Hash, typename Eq>
563 const void* input_keys,
564 buf_index_t* output_buf_indices,
565 bool* output_masks,
566 int64_t count) {
567 const Key* input_keys_templated = static_cast<const Key*>(input_keys);
568 uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;
569 uint32_t lane_id = threadIdx.x & 0x1F;
570
571 // This warp is idle.
572 if ((tid - lane_id) >= count) {
573 return;
574 }
575
576 // Initialize the memory allocator on each warp.
577 impl.node_mgr_impl_.Init(tid, lane_id);
578
579 bool lane_active = false;
580 uint32_t bucket_id = 0;
581
582 // Dummy for warp sync
583 Key key;
585
586 if (tid < count) {
587 lane_active = true;
588 key = input_keys_templated[tid];
589 bucket_id = impl.ComputeBucket(key);
590 }
591
592 result = impl.Find(lane_active, lane_id, bucket_id, key);
593
594 if (tid < count) {
595 output_buf_indices[tid] = result.first;
596 output_masks[tid] = result.second;
597 }
598}
599
600template <typename Key, typename Hash, typename Eq>
602 const void* input_keys,
603 buf_index_t* output_buf_indices,
604 bool* output_masks,
605 int64_t count) {
606 const Key* input_keys_templated = static_cast<const Key*>(input_keys);
607 uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;
608 uint32_t lane_id = threadIdx.x & 0x1F;
609
610 if (tid - lane_id >= count) {
611 return;
612 }
613
614 impl.node_mgr_impl_.Init(tid, lane_id);
615
616 bool lane_active = false;
617 uint32_t bucket_id = 0;
618
619 // Dummy for warp sync
620 Key key;
621 if (tid < count) {
622 lane_active = true;
623 key = input_keys_templated[tid];
624 bucket_id = impl.ComputeBucket(key);
625 }
626
627 auto result = impl.Erase(lane_active, lane_id, bucket_id, key);
628
629 if (tid < count) {
630 output_buf_indices[tid] = result.first;
631 output_masks[tid] = result.second;
632 }
633}
634
635template <typename Key, typename Hash, typename Eq>
637 buf_index_t* output_buf_indices,
638 bool* output_masks,
639 int64_t count) {
640 uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;
641 if (tid < count && output_masks[tid]) {
642 impl.buffer_accessor_.DeviceFree(output_buf_indices[tid]);
643 }
644}
645
646template <typename Key, typename Hash, typename Eq>
648 buf_index_t* output_buf_indices,
649 uint32_t* output_count) {
650 uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;
651 uint32_t lane_id = threadIdx.x & 0x1F;
652
653 // Assigning a warp per bucket.
654 uint32_t bucket_id = tid >> 5;
655 if (bucket_id >= impl.bucket_count_) {
656 return;
657 }
658
659 impl.node_mgr_impl_.Init(tid, lane_id);
660
661 uint32_t slab_entry = *impl.SlabEntryPtrFromHead(bucket_id, lane_id);
662 bool is_active = slab_entry != kEmptyNodeAddr;
663
664 if (is_active && ((1 << lane_id) & kNodePtrLanesMask)) {
665 uint32_t index = atomicAdd(output_count, 1);
666 output_buf_indices[index] = slab_entry;
667 }
668
669 uint32_t slab_ptr = __shfl_sync(kSyncLanesMask, slab_entry,
670 kNextSlabPtrLaneId, kWarpSize);
671
672 // Count following nodes,
673 while (slab_ptr != kEmptySlabAddr) {
674 slab_entry = *impl.SlabEntryPtrFromNodes(slab_ptr, lane_id);
675 is_active = (slab_entry != kEmptyNodeAddr);
676
677 if (is_active && ((1 << lane_id) & kNodePtrLanesMask)) {
678 uint32_t index = atomicAdd(output_count, 1);
679 output_buf_indices[index] = slab_entry;
680 }
681 slab_ptr = __shfl_sync(kSyncLanesMask, slab_entry, kNextSlabPtrLaneId,
682 kWarpSize);
683 }
684}
685
686template <typename Key, typename Hash, typename Eq>
688 SlabHashBackendImpl<Key, Hash, Eq> impl, int64_t* bucket_elem_counts) {
689 uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;
690 uint32_t lane_id = threadIdx.x & 0x1F;
691
692 // Assigning a warp per bucket.
693 uint32_t bucket_id = tid >> 5;
694 if (bucket_id >= impl.bucket_count_) {
695 return;
696 }
697
698 impl.node_mgr_impl_.Init(tid, lane_id);
699
700 uint32_t count = 0;
701
702 // Count head node.
703 uint32_t slab_entry = *impl.SlabEntryPtrFromHead(bucket_id, lane_id);
704 count += __popc(
705 __ballot_sync(kNodePtrLanesMask, slab_entry != kEmptyNodeAddr));
706 uint32_t slab_ptr = __shfl_sync(kSyncLanesMask, slab_entry,
707 kNextSlabPtrLaneId, kWarpSize);
708
709 // Count following nodes.
710 while (slab_ptr != kEmptySlabAddr) {
711 slab_entry = *impl.SlabEntryPtrFromNodes(slab_ptr, lane_id);
712 count += __popc(
713 __ballot_sync(kNodePtrLanesMask, slab_entry != kEmptyNodeAddr));
714 slab_ptr = __shfl_sync(kSyncLanesMask, slab_entry, kNextSlabPtrLaneId,
715 kWarpSize);
716 }
717
718 // Write back the results.
719 if (lane_id == 0) {
720 bucket_elem_counts[bucket_id] = count;
721 }
722}
723
724} // namespace core
725} // namespace open3d
Common CUDA utilities.
core::Tensor result
Definition: VtkUtils.cpp:75
Definition: CUDAHashBackendBufferAccessor.h:24
int64_t * value_blocks_per_element_
Definition: CUDAHashBackendBufferAccessor.h:108
__device__ void * GetValuePtr(buf_index_t ptr, int value_idx=0)
Definition: CUDAHashBackendBufferAccessor.h:91
__device__ void * GetKeyPtr(buf_index_t ptr)
Definition: CUDAHashBackendBufferAccessor.h:88
__device__ void DeviceFree(buf_index_t ptr)
Definition: CUDAHashBackendBufferAccessor.h:83
buf_index_t * heap_
Definition: CUDAHashBackendBufferAccessor.h:96
Definition: SlabHashBackendImpl.h:45
__device__ Pair< buf_index_t, bool > Erase(bool lane_active, uint32_t lane_id, uint32_t bucket_id, const Key &key)
Warp-erase an entry at key.
Definition: SlabHashBackendImpl.h:361
__device__ Pair< buf_index_t, bool > Find(bool lane_active, uint32_t lane_id, uint32_t bucket_id, const Key &key)
Warp-find a buf_index and its mask at key.
Definition: SlabHashBackendImpl.h:294
__device__ bool Insert(bool lane_active, uint32_t lane_id, uint32_t bucket_id, const Key &key, buf_index_t buf_index)
Warp-insert a pre-allocated buf_index at key.
Definition: SlabHashBackendImpl.h:189
__device__ uint32_t AllocateSlab(uint32_t lane_id)
Definition: SlabHashBackendImpl.h:463
__device__ uint32_t * SlabEntryPtr(uint32_t bucket_id, uint32_t lane_id, uint32_t slab_ptr)
Definition: SlabHashBackendImpl.h:91
__device__ void FreeSlab(uint32_t slab_ptr)
Definition: SlabHashBackendImpl.h:468
__host__ void Setup(int64_t init_buckets, const SlabNodeManagerImpl &node_mgr_impl, const CUDAHashBackendBufferAccessor &buffer_accessor)
Definition: SlabHashBackendImpl.h:179
int64_t bucket_count_
Definition: SlabHashBackendImpl.h:112
Slab * bucket_list_head_
Definition: SlabHashBackendImpl.h:114
__device__ uint32_t * SlabEntryPtrFromNodes(uint32_t slab_ptr, uint32_t lane_id)
Definition: SlabHashBackendImpl.h:99
Hash hash_fn_
Definition: SlabHashBackendImpl.h:110
__device__ uint32_t * SlabEntryPtrFromHead(uint32_t bucket_id, uint32_t lane_id)
Definition: SlabHashBackendImpl.h:103
__device__ void WarpSyncKey(const Key &key, uint32_t lane_id, Key &ret_key)
Warp-synchronize a key in a slab.
Definition: SlabHashBackendImpl.h:422
__device__ int32_t WarpFindKey(const Key &src_key, uint32_t lane_id, uint32_t slab_entry)
Warp-find a key in a slab.
Definition: SlabHashBackendImpl.h:433
__device__ int64_t ComputeBucket(const Key &key) const
Definition: SlabHashBackendImpl.h:457
SlabHashBackendImpl()
Definition: SlabHashBackendImpl.h:175
int key_size_in_int_
Definition: SlabHashBackendImpl.h:119
__device__ int32_t WarpFindEmpty(uint32_t slab_entry)
Warp-find the first empty slot in a slab.
Definition: SlabHashBackendImpl.h:450
Eq eq_fn_
Definition: SlabHashBackendImpl.h:111
SlabNodeManagerImpl node_mgr_impl_
Definition: SlabHashBackendImpl.h:115
CUDAHashBackendBufferAccessor buffer_accessor_
Definition: SlabHashBackendImpl.h:116
Definition: SlabNodeManager.h:39
Definition: SlabNodeManager.h:48
__device__ void Init(uint32_t &tid, uint32_t &lane_id)
Definition: SlabNodeManager.h:68
__device__ __forceinline__ uint32_t * get_unit_ptr_from_slab(const buf_index_t &next_slab_ptr, const uint32_t &lane_id)
Definition: SlabNodeManager.h:57
__device__ void FreeUntouched(buf_index_t ptr)
Definition: SlabNodeManager.h:134
__device__ uint32_t WarpAllocate(const uint32_t &lane_id)
Definition: SlabNodeManager.h:78
int count
Definition: FilePCD.cpp:42
__global__ void InsertKernelPass1(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:493
__global__ void InsertKernelPass0(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, int heap_counter_prev, int64_t count)
Kernels.
Definition: SlabHashBackendImpl.h:474
uint32_t buf_index_t
Definition: HashBackendBuffer.h:44
__global__ void EraseKernelPass1(SlabHashBackendImpl< Key, Hash, Eq > impl, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:636
OPEN3D_HOST_DEVICE Pair< First, Second > make_pair(const First &_first, const Second &_second)
Definition: SlabTraits.h:49
__global__ void FindKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:562
__global__ void EraseKernelPass0(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:601
__global__ void GetActiveIndicesKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, buf_index_t *output_buf_indices, uint32_t *output_count)
Definition: SlabHashBackendImpl.h:647
__global__ void CountElemsPerBucketKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, int64_t *bucket_elem_counts)
Definition: SlabHashBackendImpl.h:687
__global__ void InsertKernelPass2(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *const *input_values_soa, buf_index_t *output_buf_indices, bool *output_masks, int64_t count, int64_t n_values)
Definition: SlabHashBackendImpl.h:530
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle uint32_t
Definition: K4aPlugin.cpp:548
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t int32_t
Definition: K4aPlugin.cpp:395
Definition: PinholeCameraIntrinsic.cpp:16
Definition: SlabTraits.h:40