Open3D (C++ API)  0.17.0
Loading...
Searching...
No Matches
SlabHashBackend.h
Go to the documentation of this file.
1// ----------------------------------------------------------------------------
2// - Open3D: www.open3d.org -
3// ----------------------------------------------------------------------------
4// Copyright (c) 2018-2023 www.open3d.org
5// SPDX-License-Identifier: MIT
6// ----------------------------------------------------------------------------
7
8#pragma once
9
10#include <memory>
11
16
17namespace open3d {
18namespace core {
19template <typename Key, typename Hash, typename Eq>
21public:
22 SlabHashBackend(int64_t init_capacity,
23 int64_t key_dsize,
24 const std::vector<int64_t>& value_dsizes,
25 const Device& device);
26
28
29 void Reserve(int64_t capacity) override;
30
31 void Insert(const void* input_keys,
32 const std::vector<const void*>& input_values_soa,
33 buf_index_t* output_buf_indices,
34 bool* output_masks,
35 int64_t count) override;
36
37 void Find(const void* input_keys,
38 buf_index_t* output_buf_indices,
39 bool* output_masks,
40 int64_t count) override;
41
42 void Erase(const void* input_keys,
43 bool* output_masks,
44 int64_t count) override;
45
46 int64_t GetActiveIndices(buf_index_t* output_indices) override;
47 void Clear() override;
48
49 int64_t Size() const override;
50 int64_t GetBucketCount() const override;
51 std::vector<int64_t> BucketSizes() const override;
52 float LoadFactor() const override;
53
55
56 void Allocate(int64_t capacity) override;
57 void Free() override;
58
59protected:
63
65 std::shared_ptr<SlabNodeManager> node_mgr_;
66
68};
69
70template <typename Key, typename Hash, typename Eq>
72 int64_t init_capacity,
73 int64_t key_dsize,
74 const std::vector<int64_t>& value_dsizes,
75 const Device& device)
76 : DeviceHashBackend(init_capacity, key_dsize, value_dsizes, device) {
77 CUDAScopedDevice scoped_device(this->device_);
78 Allocate(init_capacity);
79}
80
81template <typename Key, typename Hash, typename Eq>
83 CUDAScopedDevice scoped_device(this->device_);
84 Free();
85}
86
87template <typename Key, typename Hash, typename Eq>
89 CUDAScopedDevice scoped_device(this->device_);
90}
91
92template <typename Key, typename Hash, typename Eq>
93void SlabHashBackend<Key, Hash, Eq>::Find(const void* input_keys,
94 buf_index_t* output_buf_indices,
95 bool* output_masks,
96 int64_t count) {
97 CUDAScopedDevice scoped_device(this->device_);
98 if (count == 0) return;
99
100 OPEN3D_CUDA_CHECK(cudaMemset(output_masks, 0, sizeof(bool) * count));
102 OPEN3D_CUDA_CHECK(cudaGetLastError());
103
104 const int64_t num_blocks =
105 (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
106 FindKernel<<<num_blocks, kThreadsPerBlock, 0, core::cuda::GetStream()>>>(
107 impl_, input_keys, output_buf_indices, output_masks, count);
109 OPEN3D_CUDA_CHECK(cudaGetLastError());
110}
111
112template <typename Key, typename Hash, typename Eq>
113void SlabHashBackend<Key, Hash, Eq>::Erase(const void* input_keys,
114 bool* output_masks,
115 int64_t count) {
116 CUDAScopedDevice scoped_device(this->device_);
117 if (count == 0) return;
118
119 OPEN3D_CUDA_CHECK(cudaMemset(output_masks, 0, sizeof(bool) * count));
121 OPEN3D_CUDA_CHECK(cudaGetLastError());
122 auto buf_indices = static_cast<buf_index_t*>(
123 MemoryManager::Malloc(sizeof(buf_index_t) * count, this->device_));
124
125 const int64_t num_blocks =
126 (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
127 EraseKernelPass0<<<num_blocks, kThreadsPerBlock, 0,
128 core::cuda::GetStream()>>>(
129 impl_, input_keys, buf_indices, output_masks, count);
130 EraseKernelPass1<<<num_blocks, kThreadsPerBlock, 0,
131 core::cuda::GetStream()>>>(impl_, buf_indices,
132 output_masks, count);
134 OPEN3D_CUDA_CHECK(cudaGetLastError());
135
136 MemoryManager::Free(buf_indices, this->device_);
137}
138
139template <typename Key, typename Hash, typename Eq>
141 buf_index_t* output_buf_indices) {
142 CUDAScopedDevice scoped_device(this->device_);
143 uint32_t* count = static_cast<uint32_t*>(
144 MemoryManager::Malloc(sizeof(uint32_t), this->device_));
145 OPEN3D_CUDA_CHECK(cudaMemset(count, 0, sizeof(uint32_t)));
146
148 OPEN3D_CUDA_CHECK(cudaGetLastError());
149
150 const int64_t num_blocks =
151 (impl_.bucket_count_ * kWarpSize + kThreadsPerBlock - 1) /
152 kThreadsPerBlock;
153 GetActiveIndicesKernel<<<num_blocks, kThreadsPerBlock, 0,
154 core::cuda::GetStream()>>>(
155 impl_, output_buf_indices, count);
157 OPEN3D_CUDA_CHECK(cudaGetLastError());
158
159 uint32_t ret;
160 MemoryManager::MemcpyToHost(&ret, count, this->device_, sizeof(uint32_t));
161 MemoryManager::Free(count, this->device_);
162
163 return static_cast<int64_t>(ret);
164}
165
166template <typename Key, typename Hash, typename Eq>
168 CUDAScopedDevice scoped_device(this->device_);
169 // Clear the heap
170 this->buffer_->ResetHeap();
171
172 // Clear the linked list heads
173 OPEN3D_CUDA_CHECK(cudaMemset(impl_.bucket_list_head_, 0xFF,
174 sizeof(Slab) * this->bucket_count_));
176 OPEN3D_CUDA_CHECK(cudaGetLastError());
177
178 // Clear the linked list nodes
179 node_mgr_->Reset();
180}
181
182template <typename Key, typename Hash, typename Eq>
184 CUDAScopedDevice scoped_device(this->device_);
185 return this->buffer_->GetHeapTopIndex();
186}
187
188template <typename Key, typename Hash, typename Eq>
190 CUDAScopedDevice scoped_device(this->device_);
191 return bucket_count_;
192}
193
194template <typename Key, typename Hash, typename Eq>
196 CUDAScopedDevice scoped_device(this->device_);
197 thrust::device_vector<int64_t> elems_per_bucket(impl_.bucket_count_);
198 thrust::fill(elems_per_bucket.begin(), elems_per_bucket.end(), 0);
199
200 const int64_t num_blocks =
201 (impl_.buffer_accessor_.capacity_ + kThreadsPerBlock - 1) /
202 kThreadsPerBlock;
203 CountElemsPerBucketKernel<<<num_blocks, kThreadsPerBlock, 0,
204 core::cuda::GetStream()>>>(
205 impl_, thrust::raw_pointer_cast(elems_per_bucket.data()));
207 OPEN3D_CUDA_CHECK(cudaGetLastError());
208
209 std::vector<int64_t> result(impl_.bucket_count_);
210 thrust::copy(elems_per_bucket.begin(), elems_per_bucket.end(),
211 result.begin());
212 return result;
213}
214
215template <typename Key, typename Hash, typename Eq>
217 CUDAScopedDevice scoped_device(this->device_);
218 return float(Size()) / float(this->bucket_count_);
219}
220
221template <typename Key, typename Hash, typename Eq>
223 const void* input_keys,
224 const std::vector<const void*>& input_values_soa,
225 buf_index_t* output_buf_indices,
226 bool* output_masks,
227 int64_t count) {
228 CUDAScopedDevice scoped_device(this->device_);
229 if (count == 0) return;
230
233 int prev_heap_top = this->buffer_->GetHeapTopIndex();
234 *thrust::device_ptr<int>(impl_.buffer_accessor_.heap_top_) =
235 prev_heap_top + count;
236
237 const int64_t num_blocks =
238 (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
239 InsertKernelPass0<<<num_blocks, kThreadsPerBlock, 0,
240 core::cuda::GetStream()>>>(
241 impl_, input_keys, output_buf_indices, prev_heap_top, count);
242 InsertKernelPass1<<<num_blocks, kThreadsPerBlock, 0,
243 core::cuda::GetStream()>>>(
244 impl_, input_keys, output_buf_indices, output_masks, count);
245
246 thrust::device_vector<const void*> input_values_soa_device(
247 input_values_soa.begin(), input_values_soa.end());
248
249 int64_t n_values = input_values_soa.size();
250 const void* const* ptr_input_values_soa =
251 thrust::raw_pointer_cast(input_values_soa_device.data());
252 DISPATCH_DIVISOR_SIZE_TO_BLOCK_T(
253 impl_.buffer_accessor_.common_block_size_, [&]() {
254 InsertKernelPass2<Key, Hash, Eq, block_t>
255 <<<num_blocks, kThreadsPerBlock, 0,
256 core::cuda::GetStream()>>>(
257 impl_, ptr_input_values_soa, output_buf_indices,
258 output_masks, count, n_values);
259 });
261 OPEN3D_CUDA_CHECK(cudaGetLastError());
262}
263
264template <typename Key, typename Hash, typename Eq>
266 CUDAScopedDevice scoped_device(this->device_);
267 this->bucket_count_ = capacity * 2;
268 this->capacity_ = capacity;
269
270 // Allocate buffer for key values.
271 this->buffer_ = std::make_shared<HashBackendBuffer>(
272 this->capacity_, this->key_dsize_, this->value_dsizes_,
273 this->device_);
274 buffer_accessor_.Setup(*this->buffer_);
275
276 // Allocate buffer for linked list nodes.
277 node_mgr_ = std::make_shared<SlabNodeManager>(this->device_);
278
279 // Allocate linked list heads.
280 impl_.bucket_list_head_ = static_cast<Slab*>(MemoryManager::Malloc(
281 sizeof(Slab) * this->bucket_count_, this->device_));
282 OPEN3D_CUDA_CHECK(cudaMemset(impl_.bucket_list_head_, 0xFF,
283 sizeof(Slab) * this->bucket_count_));
285 OPEN3D_CUDA_CHECK(cudaGetLastError());
286
287 impl_.Setup(this->bucket_count_, node_mgr_->impl_, buffer_accessor_);
288}
289
290template <typename Key, typename Hash, typename Eq>
292 CUDAScopedDevice scoped_device(this->device_);
293 buffer_accessor_.Shutdown(this->device_);
294 MemoryManager::Free(impl_.bucket_list_head_, this->device_);
295}
296} // namespace core
297} // namespace open3d
Common CUDA utilities.
#define OPEN3D_CUDA_CHECK(err)
Definition CUDAUtils.h:47
core::Tensor result
Definition VtkUtils.cpp:75
Definition CUDAHashBackendBufferAccessor.h:24
When CUDA is not enabled, this is a dummy class.
Definition CUDAUtils.h:214
Definition DeviceHashBackend.h:20
Device device_
Definition DeviceHashBackend.h:100
Definition Device.h:18
static void MemcpyToHost(void *host_ptr, const void *src_ptr, const Device &src_device, size_t num_bytes)
Same as Memcpy, but with host (CPU:0) as default dst_device.
Definition MemoryManager.cpp:85
static void * Malloc(size_t byte_size, const Device &device)
Definition MemoryManager.cpp:22
static void Free(void *ptr, const Device &device)
Frees previously allocated memory at address ptr on device device.
Definition MemoryManager.cpp:28
Definition SlabHashBackend.h:20
SlabHashBackend(int64_t init_capacity, int64_t key_dsize, const std::vector< int64_t > &value_dsizes, const Device &device)
Definition SlabHashBackend.h:71
void Free() override
Definition SlabHashBackend.h:291
CUDAHashBackendBufferAccessor buffer_accessor_
Definition SlabHashBackend.h:64
void Allocate(int64_t capacity) override
Definition SlabHashBackend.h:265
~SlabHashBackend()
Definition SlabHashBackend.h:82
float LoadFactor() const override
Get the current load factor, defined as size / bucket count.
Definition SlabHashBackend.h:216
std::shared_ptr< SlabNodeManager > node_mgr_
Definition SlabHashBackend.h:65
int64_t GetActiveIndices(buf_index_t *output_indices) override
Parallel collect all iterators in the hash table.
Definition SlabHashBackend.h:140
SlabHashBackendImpl< Key, Hash, Eq > impl_
Definition SlabHashBackend.h:62
void Insert(const void *input_keys, const std::vector< const void * > &input_values_soa, buf_index_t *output_buf_indices, bool *output_masks, int64_t count) override
Parallel insert contiguous arrays of keys and values.
Definition SlabHashBackend.h:222
int64_t bucket_count_
Definition SlabHashBackend.h:67
int64_t Size() const override
Get the size (number of valid entries) of the hash map.
Definition SlabHashBackend.h:183
int64_t GetBucketCount() const override
Get the number of buckets of the hash map.
Definition SlabHashBackend.h:189
void Reserve(int64_t capacity) override
Definition SlabHashBackend.h:88
void Clear() override
Clear stored map without reallocating memory.
Definition SlabHashBackend.h:167
std::vector< int64_t > BucketSizes() const override
Get the number of entries per bucket.
Definition SlabHashBackend.h:195
SlabHashBackendImpl< Key, Hash, Eq > GetImpl()
Definition SlabHashBackend.h:54
void Find(const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count) override
Parallel find a contiguous array of keys.
Definition SlabHashBackend.h:93
void Erase(const void *input_keys, bool *output_masks, int64_t count) override
Parallel erase a contiguous array of keys.
Definition SlabHashBackend.h:113
Definition SlabHashBackendImpl.h:45
Definition SlabNodeManager.h:39
int count
Definition FilePCD.cpp:42
void Synchronize()
Definition CUDAUtils.cpp:58
__global__ void InsertKernelPass1(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition SlabHashBackendImpl.h:493
__global__ void InsertKernelPass0(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, int heap_counter_prev, int64_t count)
Kernels.
Definition SlabHashBackendImpl.h:474
uint32_t buf_index_t
Definition HashBackendBuffer.h:44
__global__ void EraseKernelPass1(SlabHashBackendImpl< Key, Hash, Eq > impl, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition SlabHashBackendImpl.h:636
__global__ void FindKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition SlabHashBackendImpl.h:562
__global__ void EraseKernelPass0(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition SlabHashBackendImpl.h:601
__global__ void GetActiveIndicesKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, buf_index_t *output_buf_indices, uint32_t *output_count)
Definition SlabHashBackendImpl.h:647
__global__ void CountElemsPerBucketKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, int64_t *bucket_elem_counts)
Definition SlabHashBackendImpl.h:687
Definition PinholeCameraIntrinsic.cpp:16