SeqAn3  3.2.0-rc.1
The Modern C++ library for sequence analysis.
aa20.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <vector>
16 
20 
21 namespace seqan3
22 {
23 
63 class aa20 : public aminoacid_base<aa20, 20>
64 {
65 private:
68 
70  friend base_t;
72  friend base_t::base_t;
74 
75 public:
79  constexpr aa20() noexcept = default;
80  constexpr aa20(aa20 const &) noexcept = default;
81  constexpr aa20(aa20 &&) noexcept = default;
82  constexpr aa20 & operator=(aa20 const &) noexcept = default;
83  constexpr aa20 & operator=(aa20 &&) noexcept = default;
84  ~aa20() noexcept = default;
85 
86  using base_t::base_t;
88 
89 private:
91  static constexpr char_type rank_to_char_table[alphabet_size]{'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
92  'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'};
93 
95  static constexpr std::array<rank_type, 256> char_to_rank_table{[]() constexpr {std::array<rank_type, 256> ret{};
96 
97  // initialize with UNKNOWN (std::array::fill unfortunately not constexpr)
98  for (auto & c : ret)
99  c = 15; // value of 'S', because that appears most frequently
100 
101  // reverse mapping for characters and their lowercase
102  for (rank_type rnk = 0u; rnk < alphabet_size; ++rnk)
103  {
104  ret[static_cast<rank_type>(rank_to_char_table[rnk])] = rnk;
105  ret[static_cast<rank_type>(to_lower(rank_to_char_table[rnk]))] = rnk;
106  }
107 
108  ret['B'] = ret['D'];
109  ret['b'] = ret['D']; // Convert b (either D/N) to D, since D occurs more frequently.
110  ret['J'] = ret['L'];
111  ret['j'] = ret['L']; // Convert j (either I/L) to L, since L occurs more frequently.
112  ret['O'] = ret['L'];
113  ret['o'] = ret['L']; // Convert Pyrrolysine to lysine.
114  ret['U'] = ret['C'];
115  ret['u'] = ret['C']; // Convert Selenocysteine to cysteine.
116  ret['X'] = ret['S'];
117  ret['x'] = ret['S']; // Convert unknown amino acids to serine.
118  ret['Z'] = ret['E'];
119  ret['z'] = ret['E']; // Convert z (either E/Q) to E, since E occurs more frequently.
120  ret['*'] = ret['W']; // The most common stop codon is UGA. This is most similar to a Tryptophan.
121  return ret;
122 }()
123 }; // namespace seqan3
124 
126 static constexpr char_type rank_to_char(rank_type const rank)
127 {
128  return rank_to_char_table[rank];
129 }
130 
132 static constexpr rank_type char_to_rank(char_type const chr)
133 {
134  using index_t = std::make_unsigned_t<char_type>;
135  return char_to_rank_table[static_cast<index_t>(chr)];
136 }
137 }
138 ;
139 
140 // ------------------------------------------------------------------
141 // containers
142 // ------------------------------------------------------------------
143 
150 
151 // ------------------------------------------------------------------
152 // literals
153 // ------------------------------------------------------------------
154 inline namespace literals
155 {
156 
170 constexpr aa20 operator""_aa20(char const c) noexcept
171 {
172  return aa20{}.assign_char(c);
173 }
174 
186 inline aa20_vector operator""_aa20(char const * const s, size_t const n)
187 {
188  aa20_vector r;
189  r.resize(n);
190 
191  for (size_t i = 0; i < n; ++i)
192  r[i].assign_char(s[i]);
193 
194  return r;
195 }
197 
198 } // namespace literals
199 
200 } // namespace seqan3
Provides seqan3::aminoacid_alphabet.
Provides seqan3::aminoacid_base.
The canonical amino acid alphabet.
Definition: aa20.hpp:64
constexpr aa20() noexcept=default
Defaulted.
A CRTP-base that makes defining a custom alphabet easier.
Definition: alphabet_base.hpp:57
constexpr derived_type & assign_char(char_type const chr) noexcept requires(!std
Assign from a character, implicitly converts invalid characters.
Definition: alphabet_base.hpp:163
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition: alphabet_base.hpp:80
static constexpr detail::min_viable_uint_t< size > alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: alphabet_base.hpp:199
std::conditional_t< std::same_as< char_t, void >, char, char_t > char_type
The char representation; conditional needed to make semi alphabet definitions legal.
Definition: alphabet_base.hpp:72
A CRTP-base that refines seqan3::alphabet_base and is used by the amino acids.
Definition: aminoacid_base.hpp:32
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
constexpr char_type to_lower(char_type const c) noexcept
Converts 'A'-'Z' to 'a'-'z' respectively; other characters are returned as is.
Definition: transform.hpp:83
T resize(T... args)
Provides utilities for modifying characters.