10#include <seqan3/search/views/minimiser_hash.hpp>
22template <
typename index_t>
25 constexpr bool is_ibf = std::same_as<index_t, raptor_index<index_structure::ibf>>
26 || std::same_as<index_t, raptor_index<index_structure::ibf_compressed>>;
28 double index_io_time{0.0};
29 double reads_io_time{0.0};
30 double compute_time{0.0};
32 auto cereal_worker = [&]()
36 auto cereal_handle = std::async(std::launch::async, cereal_worker);
38 seqan3::sequence_file_input<dna4_traits, seqan3::fields<seqan3::field::id, seqan3::field::seq>> fin{
40 using record_type =
typename decltype(fin)::record_type;
41 std::vector<record_type> records{};
48 for (
auto const & file_list : arguments.
bin_path)
52 line += std::to_string(position);
54 for (
auto const & filename : file_list)
63 synced_out <<
"#QUERY_NAME\tUSER_BINS\n";
68 auto worker = [&](
size_t const start,
size_t const end)
70 auto counter = [&index]()
73 return index.ibf().template counting_agent<uint16_t>();
75 return index.ibf().membership_agent();
77 std::string result_string{};
78 std::vector<uint64_t> minimiser;
80 auto hash_adaptor = seqan3::views::minimiser_hash(arguments.
shape,
81 seqan3::window_size{arguments.window_size},
82 seqan3::seed{adjust_seed(arguments.shape_weight)});
84 for (
auto && [
id, seq] : records | seqan3::views::slice(start, end))
86 result_string.clear();
88 result_string +=
'\t';
90 auto minimiser_view = seq | hash_adaptor | std::views::common;
91 minimiser.assign(minimiser_view.begin(), minimiser_view.end());
93 size_t const minimiser_count{minimiser.size()};
94 size_t const threshold = thresholder.get(minimiser_count);
98 auto & result = counter.bulk_count(minimiser);
99 size_t current_bin{0};
100 for (
auto && count : result)
102 if (count >= threshold)
104 result_string += std::to_string(current_bin);
105 result_string +=
',';
112 auto & result = counter.bulk_contains(minimiser, threshold);
113 for (
auto && count : result)
115 result_string += std::to_string(count);
116 result_string +=
',';
120 if (
auto & last_char = result_string.back(); last_char ==
',')
123 result_string +=
'\n';
124 synced_out.write(result_string);
128 for (
auto && chunked_records : fin | seqan3::views::chunk((1ULL << 20) * 10))
131 auto start = std::chrono::high_resolution_clock::now();
132 std::ranges::move(chunked_records, std::back_inserter(records));
133 auto end = std::chrono::high_resolution_clock::now();
134 reads_io_time += std::chrono::duration_cast<std::chrono::duration<double>>(end - start).count();
136 cereal_handle.wait();
144 std::filesystem::path file_path{arguments.
out_file};
145 file_path +=
".time";
146 std::ofstream file_handle{file_path};
147 file_handle <<
"Index I/O\tReads I/O\tCompute\n";
148 file_handle << std::fixed << std::setprecision(2) << index_io_time <<
'\t' << reads_io_time <<
'\t'
Definition: sync_out.hpp:18
Definition: threshold.hpp:17
Definition: adjust_seed.hpp:13
void do_parallel(algorithm_t &&worker, size_t const num_records, size_t const threads, double &compute_time)
Definition: do_parallel.hpp:18
void search_single(search_arguments const &arguments, index_t &&index)
Definition: search_single.hpp:23
void load_index(index_t &index, search_arguments const &arguments, size_t const part, double &index_io_time)
Definition: load_index.hpp:19
Definition: search_arguments.hpp:27
uint8_t threads
Definition: search_arguments.hpp:33
raptor::threshold::threshold_parameters make_threshold_parameters() const noexcept
Definition: search_arguments.hpp:58
seqan3::shape shape
Definition: search_arguments.hpp:30
std::filesystem::path query_file
Definition: search_arguments.hpp:51
std::vector< std::vector< std::string > > bin_path
Definition: search_arguments.hpp:50
bool write_time
Definition: search_arguments.hpp:53
std::filesystem::path out_file
Definition: search_arguments.hpp:52