Needle
An application for fast and efficient searches of NGS data.
ibf.h
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/needle/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
8#pragma once
9
10#include <iostream>
11#include <math.h>
12#include <numeric>
13#include <string>
14
15#include <seqan3/alphabet/container/concatenated_sequences.hpp>
16#include <seqan3/alphabet/nucleotide/dna4.hpp>
17#include <filesystem>
18
19#include "shared.h"
20
22{
23 std::filesystem::path include_file; // Needs to be defined when only minimisers appearing in this file should be stored
24 std::filesystem::path exclude_file; // Needs to be defined when minimisers appearing in this file should NOT be stored
25 std::vector<int> samples{}; // Can be used to indicate that sequence files belong to the same experiment
26 bool paired = false; // If true, than experiments are seen as paired-end experiments
27 bool experiment_names = false; // Flag, if names of experiment should be stored in a txt file
28};
29
32 int maxi;
33 RandomGenerator(int max) :
34 maxi(max) {
35 }
36
37 int operator()() {
38 return rand() % maxi;
39 }
40};
41
50void count(min_arguments const & args, std::vector<std::filesystem::path> sequence_files, std::filesystem::path genome_file,
51 std::filesystem::path exclude_file, bool paired);
52
58void read_binary(std::filesystem::path filename, robin_hood::unordered_node_map<uint64_t, uint16_t> & hash_table);
59
66void read_binary_start(min_arguments & args, std::filesystem::path filename, uint64_t & num_of_minimisers, uint8_t & cutoff);
67
80std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & sequence_files, estimate_ibf_arguments & ibf_args,
81 minimiser_arguments & minimiser_args, std::vector<double> & fpr, std::vector<uint8_t> & cutoffs,
82 std::filesystem::path const expression_by_genome_file = "",
83 size_t num_hash = 1);
84
95std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & minimiser_files,
96 estimate_ibf_arguments & ibf_args, std::vector<double> & fpr,
97 std::filesystem::path const expression_by_genome_file = "",
98 size_t num_hash = 1);
99
106void minimiser(std::vector<std::filesystem::path> const & sequence_files, min_arguments const & args,
107 minimiser_arguments & minimiser_args, std::vector<uint8_t> & cutoffs);
void minimiser(std::vector< std::filesystem::path > const &sequence_files, min_arguments const &args, minimiser_arguments &minimiser_args, std::vector< uint8_t > &cutoffs)
Create minimiser and header files.
Definition: ibf.cpp:794
void count(min_arguments const &args, std::vector< std::filesystem::path > sequence_files, std::filesystem::path genome_file, std::filesystem::path exclude_file, bool paired)
Get the concrete expression values (= median of all counts of one transcript) for given experiments....
Definition: ibf.cpp:143
void read_binary(std::filesystem::path filename, robin_hood::unordered_node_map< uint64_t, uint16_t > &hash_table)
Reads a binary file that needle minimiser creates.
Definition: ibf.cpp:200
std::vector< uint16_t > ibf(std::vector< std::filesystem::path > const &sequence_files, estimate_ibf_arguments &ibf_args, minimiser_arguments &minimiser_args, std::vector< double > &fpr, std::vector< uint8_t > &cutoffs, std::filesystem::path const expression_by_genome_file="", size_t num_hash=1)
Creates IBFs.
Definition: ibf.cpp:672
void read_binary_start(min_arguments &args, std::filesystem::path filename, uint64_t &num_of_minimisers, uint8_t &cutoff)
Reads the beginning of a binary file that needle minimiser creates.
Definition: ibf.cpp:232
Generates a random integer not greater than a given maximum.
Definition: ibf.h:31
int maxi
Definition: ibf.h:32
RandomGenerator(int max)
Definition: ibf.h:33
int operator()()
Definition: ibf.h:37
arguments used for estimate, ibf, ibfmin
Definition: shared.h:41
arguments used for estimate, ibf, minimiser
Definition: shared.h:32
Definition: ibf.h:22
bool paired
Definition: ibf.h:26
std::filesystem::path include_file
Definition: ibf.h:23
std::vector< int > samples
Definition: ibf.h:25
std::filesystem::path exclude_file
Definition: ibf.h:24
bool experiment_names
Definition: ibf.h:27