SeqAn3  3.2.0-rc.1
The Modern C++ library for sequence analysis.
sam_tag_dictionary.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <concepts>
16 #include <map>
17 #include <variant>
18 
22 
23 namespace seqan3::detail
24 {
27 using sam_tag_variant = std::variant<char,
28  int32_t,
29  float,
39 
42 constexpr char sam_tag_type_char[12] = {'A', 'i', 'f', 'Z', 'H', 'B', 'B', 'B', 'B', 'B', 'B', 'B'};
45 constexpr char sam_tag_type_char_extra[12] = {'\0', '\0', '\0', '\0', '\0', 'c', 'C', 's', 'S', 'i', 'I', 'f'};
46 } // namespace seqan3::detail
47 
48 namespace seqan3
49 {
50 
51 inline namespace literals
52 {
53 
74 #ifdef __cpp_nontype_template_parameter_class
75 template <small_string<2> str> // TODO: better handling if too large string is provided?
76 constexpr uint16_t operator""_tag()
77 {
78 #else // GCC/Clang extension
79 # pragma GCC diagnostic push
80 # pragma GCC diagnostic ignored "-Wpedantic"
81 template <typename char_t, char_t... s>
82 constexpr uint16_t operator""_tag()
83 {
84  static_assert(std::same_as<char_t, char>, "Illegal SAM tag: Type must be char.");
85  constexpr small_string<sizeof...(s)> str{std::array<char, sizeof...(s)>{s...}};
86 # pragma GCC diagnostic pop
87 #endif
88 
89  static_assert(str.size() == 2, "Illegal SAM tag: Exactly two characters must be given.");
90 
91  constexpr char char0 = str[0];
92  constexpr char char1 = str[1];
93 
94  static_assert((is_alpha(char0) && is_alnum(char1)), "Illegal SAM tag: a SAM tag must match /[A-Za-z][A-Za-z0-9]/.");
95 
96  return static_cast<uint16_t>(char0) * 256 + static_cast<uint16_t>(char1);
97 }
99 
100 } // namespace literals
101 
179 template <uint16_t tag_value>
181 {
184 };
185 
188 template <uint16_t tag_value>
190 
191 // clang-format off
193 template <> struct sam_tag_type<"AM"_tag> { using type = int32_t; };
194 template <> struct sam_tag_type<"AS"_tag> { using type = int32_t; };
195 template <> struct sam_tag_type<"BC"_tag> { using type = std::string; };
196 template <> struct sam_tag_type<"BQ"_tag> { using type = std::string; };
197 template <> struct sam_tag_type<"BZ"_tag> { using type = std::string; };
198 template <> struct sam_tag_type<"CB"_tag> { using type = std::string; };
199 template <> struct sam_tag_type<"CC"_tag> { using type = std::string; };
200 template <> struct sam_tag_type<"CG"_tag> { using type = std::vector<int32_t>; };
201 template <> struct sam_tag_type<"CM"_tag> { using type = int32_t; };
202 template <> struct sam_tag_type<"CO"_tag> { using type = std::string; };
203 template <> struct sam_tag_type<"CP"_tag> { using type = int32_t; };
204 template <> struct sam_tag_type<"CQ"_tag> { using type = std::string; };
205 template <> struct sam_tag_type<"CR"_tag> { using type = std::string; };
206 template <> struct sam_tag_type<"CS"_tag> { using type = std::string; };
207 template <> struct sam_tag_type<"CT"_tag> { using type = std::string; };
208 template <> struct sam_tag_type<"CY"_tag> { using type = std::string; };
209 template <> struct sam_tag_type<"E2"_tag> { using type = std::string; };
210 template <> struct sam_tag_type<"FI"_tag> { using type = int32_t; };
211 template <> struct sam_tag_type<"FS"_tag> { using type = std::string; };
212 template <> struct sam_tag_type<"FZ"_tag> { using type = std::vector<uint16_t>; };
213 
214 // template <> struct sam_tag_type<"GC"_tag> {};
215 // template <> struct sam_tag_type<"GQ"_tag> {};
216 // template <> struct sam_tag_type<"GS"_tag> {};
217 
218 template <> struct sam_tag_type<"H0"_tag> { using type = int32_t; };
219 template <> struct sam_tag_type<"H1"_tag> { using type = int32_t; };
220 template <> struct sam_tag_type<"H2"_tag> { using type = int32_t; };
221 template <> struct sam_tag_type<"HI"_tag> { using type = int32_t; };
222 template <> struct sam_tag_type<"IH"_tag> { using type = int32_t; };
223 template <> struct sam_tag_type<"LB"_tag> { using type = std::string; };
224 template <> struct sam_tag_type<"MC"_tag> { using type = std::string; };
225 template <> struct sam_tag_type<"MD"_tag> { using type = std::string; };
226 
227 // template <> struct sam_tag_type<"MF"_tag> {};
228 
229 template <> struct sam_tag_type<"MI"_tag> { using type = std::string; };
230 template <> struct sam_tag_type<"MQ"_tag> { using type = int32_t; };
231 template <> struct sam_tag_type<"NH"_tag> { using type = int32_t; };
232 template <> struct sam_tag_type<"NM"_tag> { using type = int32_t; };
233 template <> struct sam_tag_type<"OC"_tag> { using type = std::string; };
234 template <> struct sam_tag_type<"OP"_tag> { using type = int32_t; };
235 template <> struct sam_tag_type<"OQ"_tag> { using type = std::string; };
236 template <> struct sam_tag_type<"OX"_tag> { using type = std::string; };
237 template <> struct sam_tag_type<"PG"_tag> { using type = std::string; };
238 template <> struct sam_tag_type<"PQ"_tag> { using type = int32_t; };
239 template <> struct sam_tag_type<"PT"_tag> { using type = std::string; };
240 template <> struct sam_tag_type<"PU"_tag> { using type = std::string; };
241 template <> struct sam_tag_type<"Q2"_tag> { using type = std::string; };
242 template <> struct sam_tag_type<"QT"_tag> { using type = std::string; };
243 template <> struct sam_tag_type<"QX"_tag> { using type = std::string; };
244 template <> struct sam_tag_type<"R2"_tag> { using type = std::string; };
245 template <> struct sam_tag_type<"RG"_tag> { using type = std::string; };
246 template <> struct sam_tag_type<"RT"_tag> { using type = std::string; };
247 template <> struct sam_tag_type<"RX"_tag> { using type = std::string; };
248 
249 // template <> struct sam_tag_type<"S2"_tag> {};
250 
251 template <> struct sam_tag_type<"SA"_tag> { using type = std::string; };
252 template <> struct sam_tag_type<"SM"_tag> { using type = int32_t; };
253 
254 // template <> struct sam_tag_type<"SQ"_tag> {};
255 
256 template <> struct sam_tag_type<"TC"_tag> { using type = int32_t; };
257 template <> struct sam_tag_type<"U2"_tag> { using type = std::string; };
258 template <> struct sam_tag_type<"UQ"_tag> { using type = int32_t; };
260 // clang-format on
261 
342 class sam_tag_dictionary : public std::map<uint16_t, detail::sam_tag_variant>
343 {
344 private:
347 
348 public:
351 
368  template <uint16_t tag>
369  requires (!std::same_as<sam_tag_type_t<tag>, variant_type>)
370  auto & get() &
371  {
372  if ((*this).count(tag) == 0)
373  (*this)[tag] = sam_tag_type_t<tag>{}; // set correct type if tag is not set yet on
374 
375  return std::get<sam_tag_type_t<tag>>((*this)[tag]);
376  }
377 
379  template <uint16_t tag>
380  requires (!std::same_as<sam_tag_type_t<tag>, variant_type>)
381  auto && get() &&
382  {
383  if ((*this).count(tag) == 0)
384  (*this)[tag] = sam_tag_type_t<tag>{}; // set correct type if tag is not set yet on
385 
386  return std::get<sam_tag_type_t<tag>>(std::move((*this)[tag]));
387  }
388 
391  template <uint16_t tag>
392  requires (!std::same_as<sam_tag_type_t<tag>, variant_type>)
393  auto const & get() const &
394  {
395  return std::get<sam_tag_type_t<tag>>((*this).at(tag));
396  }
397 
400  template <uint16_t tag>
401  requires (!std::same_as<sam_tag_type_t<tag>, variant_type>)
402  auto const && get() const &&
403  {
404  return std::get<sam_tag_type_t<tag>>(std::move((*this).at(tag)));
405  }
407 };
408 
409 } // namespace seqan3
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:343
requires(!std::same_as< sam_tag_type_t< tag >, variant_type >) auto const &&get() const &&
Uses std::map::at() for access and throws when the key is unknown.
Definition: sam_tag_dictionary.hpp:401
requires(!std::same_as< sam_tag_type_t< tag >, variant_type >) auto &get() &
Uses std::map::operator[] for access and default initializes new keys.
Definition: sam_tag_dictionary.hpp:369
requires(!std::same_as< sam_tag_type_t< tag >, variant_type >) auto &&get() &&
Uses std::map::operator[] for access and default initializes new keys.
Definition: sam_tag_dictionary.hpp:380
requires(!std::same_as< sam_tag_type_t< tag >, variant_type >) auto const &get() const &
Uses std::map::at() for access and throws when the key is unknown.
Definition: sam_tag_dictionary.hpp:392
Implements a small string that can be used for compile time computations.
Definition: small_string.hpp:44
The <concepts> header from C++20's standard library.
constexpr auto is_alnum
Checks whether c is a alphanumeric character.
Definition: predicate.hpp:197
constexpr auto is_alpha
Checks whether c is a alphabetical character.
Definition: predicate.hpp:214
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
constexpr auto const & get(configuration< configs_t... > const &config) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: configuration.hpp:415
Provides character predicates for tokenisation.
A constexpr string implementation to manipulate string literals at compile time.
The generic base class.
Definition: sam_tag_dictionary.hpp:181
typename sam_tag_type< tag_value >::type sam_tag_type_t
Short cut helper for seqan3::sam_tag_type::type.
Definition: sam_tag_dictionary.hpp:189
Provides type traits for working with templates.