12#include <seqan3/search/dream_index/interleaved_bloom_filter.hpp>
14#ifndef RAPTOR_HIBF_HAS_COUNT
15# define RAPTOR_HIBF_HAS_COUNT 0
76template <seqan3::data_layout data_layout_mode_ = seqan3::data_layout::uncompressed>
86#if RAPTOR_HIBF_HAS_COUNT
88 template <std::
integral value_t>
96 using ibf_t = seqan3::interleaved_bloom_filter<data_layout_mode_>;
133#if RAPTOR_HIBF_HAS_COUNT
137 template <std::
integral value_t = u
int16_t>
138 counting_agent_type<value_t> counting_agent()
const
140 return counting_agent_type<value_t>{*
this};
151 template <seqan3::cereal_archive archive_t>
152 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
163template <seqan3::data_layout data_layout_mode>
168 std::vector<std::string> user_bin_filenames;
176 std::vector<std::vector<int64_t>> ibf_bin_to_filename_position{};
182 return user_bin_filenames.size();
188 ibf_bin_to_filename_position.resize(size);
194 user_bin_filenames.resize(size);
200 return ibf_bin_to_filename_position[idx];
206 return user_bin_filenames[idx];
210 std::string
const &
operator[](std::pair<size_t, size_t>
const & index_pair)
const
212 return user_bin_filenames[ibf_bin_to_filename_position[index_pair.first][index_pair.second]];
220 return ibf_bin_to_filename_position[ibf_idx]
221 | std::views::transform(
225 return std::string{};
227 return user_bin_filenames[i];
234 return ibf_bin_to_filename_position[ibf_idx][bin_idx];
242 template <
typename stream_t>
247 for (
auto const & filename : user_bin_filenames)
251 line += std::to_string(position);
267 template <
typename archive_t>
268 void serialize(archive_t & archive)
270 archive(user_bin_filenames);
271 archive(ibf_bin_to_filename_position);
281template <seqan3::data_layout data_layout_mode>
289 hibf_t const *
const hibf_ptr{
nullptr};
292 template <std::ranges::forward_range value_range_t>
293 void bulk_contains_impl(value_range_t && values, int64_t
const ibf_idx,
size_t const threshold)
295 auto agent = hibf_ptr->
ibf_vector[ibf_idx].template counting_agent<uint16_t>();
296 auto & result = agent.bulk_count(values);
300 for (
size_t bin{}; bin < result.size(); ++bin)
304 auto const current_filename_index = hibf_ptr->user_bins.filename_index(ibf_idx, bin);
306 if (current_filename_index < 0)
308 if (sum >= threshold)
309 bulk_contains_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
312 else if (bin + 1u == result.size() ||
313 current_filename_index != hibf_ptr->user_bins.filename_index(ibf_idx, bin + 1))
315 if (sum >= threshold)
316 result_buffer.emplace_back(current_filename_index);
361 template <std::ranges::forward_range value_range_t>
362 [[nodiscard]] std::vector<int64_t>
const &
bulk_contains(value_range_t && values,
size_t const threshold) &
noexcept
364 assert(hibf_ptr !=
nullptr);
366 static_assert(std::ranges::forward_range<value_range_t>,
"The values must model forward_range.");
367 static_assert(std::unsigned_integral<std::ranges::range_value_t<value_range_t>>,
368 "An individual value must be an unsigned integral.");
370 result_buffer.clear();
372 bulk_contains_impl(values, 0, threshold);
374 std::ranges::sort(result_buffer);
376 return result_buffer;
381 template <std::ranges::range value_range_t>
382 [[nodiscard]] std::vector<int64_t>
const &
bulk_contains(value_range_t && values,
383 size_t const threshold) &&
noexcept =
delete;
387#if RAPTOR_HIBF_HAS_COUNT
390template <seqan3::data_layout data_layout_mode>
391template <std::
integral value_t>
399 hibf_t const *
const hibf_ptr{
nullptr};
402 template <std::ranges::forward_range value_range_t>
403 void bulk_count_impl(value_range_t && values, int64_t
const ibf_idx,
size_t const threshold)
405 auto agent = hibf_ptr->
ibf_vector[ibf_idx].template counting_agent<value_t>();
406 auto & result = agent.bulk_count(values);
410 for (
size_t bin{}; bin < result.size(); ++bin)
413 auto const current_filename_index = hibf_ptr->user_bins.filename_index(ibf_idx, bin);
415 if (current_filename_index < 0)
417 if (sum >= threshold)
418 bulk_count_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
421 else if (bin + 1u == result.size() ||
422 current_filename_index != hibf_ptr->user_bins.filename_index(ibf_idx, bin + 1))
424 if (sum >= threshold)
425 result_buffer[current_filename_index] = sum;
447 hibf_ptr(std::addressof(
hibf)),
448 result_buffer(hibf_ptr->
user_bins.num_user_bins())
474 template <std::ranges::forward_range value_range_t>
475 [[nodiscard]] seqan3::counting_vector<value_t>
const &
bulk_count(value_range_t && values,
476 size_t const threshold = 1u) &
noexcept
478 assert(hibf_ptr !=
nullptr);
479 assert(threshold > 0u);
480 assert(result_buffer.size() == hibf_ptr->user_bins.num_user_bins());
482 static_assert(std::ranges::forward_range<value_range_t>,
"The values must model forward_range.");
483 static_assert(std::unsigned_integral<std::ranges::range_value_t<value_range_t>>,
484 "An individual value must be an unsigned integral.");
486 std::ranges::fill(result_buffer,
static_cast<value_t
>(0u));
488 bulk_count_impl(values, 0, threshold);
490 return result_buffer;
495 template <std::ranges::range value_range_t>
496 [[nodiscard]] seqan3::counting_vector<value_t>
const &
bulk_count(value_range_t && values,
497 size_t const threshold = 1u) &&
noexcept =
delete;
Manages counting ranges of values for the hibf::hierarchical_interleaved_bloom_filter.
Definition: hierarchical_interleaved_bloom_filter.hpp:393
counting_agent_type & operator=(counting_agent_type const &)=default
Defaulted.
counting_agent_type & operator=(counting_agent_type &&)=default
Defaulted.
counting_agent_type(counting_agent_type &&)=default
Defaulted.
seqan3::counting_vector< value_t > const & bulk_count(value_range_t &&values, size_t const threshold=1u) &noexcept
Counts the occurrences in each bin for all values in a range.
Definition: hierarchical_interleaved_bloom_filter.hpp:475
seqan3::counting_vector< value_t > const & bulk_count(value_range_t &&values, size_t const threshold=1u) &&noexcept=delete
counting_agent_type(counting_agent_type const &)=default
Defaulted.
~counting_agent_type()=default
Defaulted.
counting_agent_type()=default
Defaulted.
seqan3::counting_vector< value_t > result_buffer
Stores the result of bulk_count().
Definition: hierarchical_interleaved_bloom_filter.hpp:453
Manages membership queries for the hibf::hierarchical_interleaved_bloom_filter.
Definition: hierarchical_interleaved_bloom_filter.hpp:283
membership_agent & operator=(membership_agent const &)=default
Defaulted.
membership_agent(membership_agent const &)=default
Defaulted.
membership_agent & operator=(membership_agent &&)=default
Defaulted.
std::vector< int64_t > const & bulk_contains(value_range_t &&values, size_t const threshold) &noexcept
Determines set membership of given values, and returns the user bin indices of occurrences.
Definition: hierarchical_interleaved_bloom_filter.hpp:362
~membership_agent()=default
Defaulted.
std::vector< int64_t > result_buffer
Stores the result of bulk_contains().
Definition: hierarchical_interleaved_bloom_filter.hpp:342
membership_agent()=default
Defaulted.
std::vector< int64_t > const & bulk_contains(value_range_t &&values, size_t const threshold) &&noexcept=delete
membership_agent(membership_agent &&)=default
Defaulted.
Bookkeeping for user and technical bins.
Definition: hierarchical_interleaved_bloom_filter.hpp:165
int64_t filename_index(size_t const ibf_idx, size_t const bin_idx) const
Returns the filename index of the ibf_idxth IBF for bin bin_idx.
Definition: hierarchical_interleaved_bloom_filter.hpp:232
void set_ibf_count(size_t const size)
Changes the number of managed IBFs.
Definition: hierarchical_interleaved_bloom_filter.hpp:186
void set_user_bin_count(size_t const size)
Changes the number of managed user bins.
Definition: hierarchical_interleaved_bloom_filter.hpp:192
auto operator[](size_t const ibf_idx) const
Returns a view over the user bin filenames for the ibf_idxth IBF. An empty string is returned for mer...
Definition: hierarchical_interleaved_bloom_filter.hpp:218
void write_filenames(stream_t &out_stream) const
Writes all filenames to a stream. Index and filename are tab-separated.
Definition: hierarchical_interleaved_bloom_filter.hpp:243
std::string const & operator[](std::pair< size_t, size_t > const &index_pair) const
For a pair (a,b), returns a const reference to the filename of the user bin at IBF a,...
Definition: hierarchical_interleaved_bloom_filter.hpp:210
std::string & filename_of_user_bin(size_t const idx)
Returns the filename of the idxth user bin.
Definition: hierarchical_interleaved_bloom_filter.hpp:204
size_t num_user_bins() const noexcept
Returns the number of managed user bins.
Definition: hierarchical_interleaved_bloom_filter.hpp:180
std::vector< int64_t > & bin_indices_of_ibf(size_t const idx)
Returns a vector containing user bin indices for each bin in the idxth IBF.
Definition: hierarchical_interleaved_bloom_filter.hpp:198
The HIBF binning directory. A data structure that efficiently answers set-membership queries for mult...
Definition: hierarchical_interleaved_bloom_filter.hpp:78
hierarchical_interleaved_bloom_filter(hierarchical_interleaved_bloom_filter const &)=default
Defaulted.
~hierarchical_interleaved_bloom_filter()=default
Defaulted.
std::vector< ibf_t > ibf_vector
The individual interleaved Bloom filters.
Definition: hierarchical_interleaved_bloom_filter.hpp:113
hierarchical_interleaved_bloom_filter & operator=(hierarchical_interleaved_bloom_filter const &)=default
Defaulted.
membership_agent membership_agent() const
Returns a membership_agent to be used for counting.
Definition: hierarchical_interleaved_bloom_filter.hpp:128
static constexpr seqan3::data_layout data_layout_mode
Indicates whether the Interleaved Bloom Filter is compressed.
Definition: hierarchical_interleaved_bloom_filter.hpp:93
hierarchical_interleaved_bloom_filter & operator=(hierarchical_interleaved_bloom_filter &&)=default
Defaulted.
std::vector< std::vector< int64_t > > next_ibf_id
Stores for each bin in each IBF of the HIBF the ID of the next IBF.
Definition: hierarchical_interleaved_bloom_filter.hpp:122
hierarchical_interleaved_bloom_filter(hierarchical_interleaved_bloom_filter &&)=default
Defaulted.
seqan3::interleaved_bloom_filter< data_layout_mode_ > ibf_t
The type of an individual Bloom filter.
Definition: hierarchical_interleaved_bloom_filter.hpp:96
hierarchical_interleaved_bloom_filter()=default
Defaulted.
user_bins user_bins
The underlying user bins.
Definition: hierarchical_interleaved_bloom_filter.hpp:125
hierarchical_interleaved_bloom_filter< seqan3::data_layout::uncompressed > hibf
Definition: index.hpp:24
Definition: adjust_seed.hpp:13