fastq_to_fasta
A template for creation of SeqAn3 apps, with a FASTQ to FASTA example app.
forward_strand_minimiser.hpp
Go to the documentation of this file.
1// --------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md
6// --------------------------------------------------------------------------------------------------
7
8#pragma once
9
10#include <deque>
11
12#include <seqan3/alphabet/nucleotide/dna4.hpp>
13#include <seqan3/search/views/kmer_hash.hpp>
14
17
19{
20
21// Minimiser without looking at reverse complement
23{
24private:
26 uint64_t window_size{};
28 seqan3::shape shape{};
30 uint8_t shape_size{};
32 uint64_t seed{};
34 std::vector<uint64_t> forward_hashes{};
35
36public:
38 std::vector<uint64_t> minimiser_begin;
39
46
52 forward_strand_minimiser(window const window_size_,
53 seqan3::shape const shape_,
54 uint64_t const seed_ = 0x8F3F73B5CF1C9ADE) :
55 window_size{window_size_.v},
56 shape{shape_},
57 shape_size{shape.size()},
58 seed{adjust_seed(shape.count(), seed_)}
59 {
60 assert(window_size >= shape_size);
61 }
62
68 void resize(window const window_size_, seqan3::shape const shape_, uint64_t const seed_ = 0x8F3F73B5CF1C9ADE)
69 {
70 window_size = window_size_.v;
71 shape = shape_;
72 shape_size = shape.size();
73 seed = adjust_seed(shape.count(), seed_);
74 assert(window_size >= shape_size);
75 }
76
77 void compute(std::vector<seqan3::dna4> const & text)
78 {
79 assert(window_size && shape_size && seed); // Forgot to initialise/resize?
80
81 size_t const text_length = text.size();
82 assert(shape_size <= text_length);
83 assert(window_size <= text_length);
84
85 uint64_t const max_number_of_minimiser = text_length - window_size + 1u;
86 uint64_t const kmers_per_window = window_size - shape_size + 1u;
87
88 minimiser_begin.clear();
89 minimiser_begin.reserve(max_number_of_minimiser);
90
91 // Compute all k-mer hashes.
92 auto apply_xor = [this](uint64_t const value)
93 {
94 return value ^ seed;
95 };
96 auto kmer_view = text | seqan3::views::kmer_hash(shape) | std::views::transform(apply_xor);
97 forward_hashes.assign(kmer_view.begin(), kmer_view.end());
98
99 // Stores hash and begin for all k-mers in the window
100 std::deque<std::pair<uint64_t, uint64_t>> window_hashes;
101
102 // Initialisation. We need to compute all hashes for the first window.
103 for (uint64_t i = 0; i < kmers_per_window; ++i)
104 window_hashes.emplace_back(forward_hashes[i], i);
105
106 // The minimum hash is the minimiser. Store the begin position.
107 auto min = std::min_element(std::begin(window_hashes), std::end(window_hashes));
108 minimiser_begin.push_back(min->second);
109
110 // For the following windows, we remove the first window k-mer (is now not in window) and add the new k-mer
111 // that results from the window shifting.
112 for (uint64_t i = kmers_per_window; i < max_number_of_minimiser; ++i)
113 {
114 // Already store the new hash without removing the first one.
115 uint64_t const new_hash{forward_hashes[i + kmers_per_window - 1]}; // Already did kmers_per_window - 1 many
116 window_hashes.emplace_back(new_hash, i);
117
118 if (new_hash < min->second) // New hash is the minimum.
119 {
120 min = std::prev(std::end(window_hashes));
121 minimiser_begin.push_back(min->second);
122 }
123 else if (min == std::begin(window_hashes)) // Minimum is the yet-to-be-removed begin of the window.
124 {
125 // The first hash will be removed, the last one is caught by the previous if.
126 min = std::min_element(++std::begin(window_hashes), std::prev(std::end(window_hashes)));
127 minimiser_begin.push_back(min->second);
128 }
129
130 window_hashes.pop_front(); // Remove the first k-mer.
131 }
132 return;
133 }
134};
135
136} // namespace raptor::threshold
Definition: forward_strand_minimiser.hpp:19
Definition: forward_strand_minimiser.hpp:23
forward_strand_minimiser(forward_strand_minimiser &&)=default
Defaulted.
forward_strand_minimiser & operator=(forward_strand_minimiser const &)=default
Defaulted.
forward_strand_minimiser & operator=(forward_strand_minimiser &&)=default
Defaulted.
forward_strand_minimiser(window const window_size_, seqan3::shape const shape_, uint64_t const seed_=0x8F3F73B5CF1C9ADE)
Constructs a minimiser from given k-mer, window size and a seed.
Definition: forward_strand_minimiser.hpp:52
std::vector< uint64_t > minimiser_begin
Stores the begin positions of the minimisers.
Definition: forward_strand_minimiser.hpp:38
forward_strand_minimiser(forward_strand_minimiser const &)=default
Defaulted.
void resize(window const window_size_, seqan3::shape const shape_, uint64_t const seed_=0x8F3F73B5CF1C9ADE)
Resize the minimiser.
Definition: forward_strand_minimiser.hpp:68
void compute(std::vector< seqan3::dna4 > const &text)
Definition: forward_strand_minimiser.hpp:77
Strong type for passing the window size.
Definition: strong_types.hpp:17
uint32_t v
Definition: strong_types.hpp:18