SeqAn3  3.2.0-rc.1
The Modern C++ library for sequence analysis.
format_sam_base.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
14 #pragma once
15 
16 #include <climits>
17 #include <ranges>
18 #include <string>
19 #include <vector>
20 
32 
33 namespace seqan3::detail
34 {
35 
44 class format_sam_base
45 {
46 protected:
50  format_sam_base() = default;
51  format_sam_base(format_sam_base const &) = default;
52  format_sam_base & operator=(format_sam_base const &) = default;
53  format_sam_base(format_sam_base &&) = default;
54  format_sam_base & operator=(format_sam_base &&) = default;
55  ~format_sam_base() = default;
56 
58 
60  static constexpr std::array format_version{'1', '.', '6'};
61 
63  std::array<char, 316> arithmetic_buffer{}; // Doubles can be up to 316 characters
64 
66  bool header_was_written{false};
67 
69  bool ref_info_present_in_header{false};
70 
71  template <typename ref_id_type, typename ref_id_tmp_type, typename header_type, typename ref_seqs_type>
72  void check_and_assign_ref_id(ref_id_type & ref_id,
73  ref_id_tmp_type & ref_id_tmp,
74  header_type & header,
75  ref_seqs_type & /*tag*/);
76 
77  template <typename align_type, typename ref_seqs_type>
78  void construct_alignment(align_type & align,
79  std::vector<cigar> & cigar_vector,
80  [[maybe_unused]] int32_t rid,
81  [[maybe_unused]] ref_seqs_type & ref_seqs,
82  [[maybe_unused]] int32_t ref_start,
83  size_t ref_length);
84 
85  void transfer_soft_clipping_to(std::vector<cigar> const & cigar_vector, int32_t & sc_begin, int32_t & sc_end) const;
86 
87  template <typename stream_view_t>
88  void read_byte_field(stream_view_t && stream_view, std::byte & byte_target);
89 
90  template <typename stream_view_type, std::ranges::forward_range target_range_type>
91  void read_forward_range_field(stream_view_type && stream_view, target_range_type & target);
92 
93  template <typename stream_view_t, arithmetic arithmetic_target_type>
94  void read_arithmetic_field(stream_view_t && stream_view, arithmetic_target_type & arithmetic_target);
95 
96  template <typename stream_view_type, typename ref_ids_type, typename ref_seqs_type>
97  void read_header(stream_view_type && stream_view,
98  sam_file_header<ref_ids_type> & hdr,
99  ref_seqs_type & /*ref_id_to_pos_map*/);
100 
101  template <typename stream_t, typename ref_ids_type>
102  void
103  write_header(stream_t & stream, sam_file_output_options const & options, sam_file_header<ref_ids_type> & header);
104 };
105 
116 template <typename ref_id_type, typename ref_id_tmp_type, typename header_type, typename ref_seqs_type>
117 inline void format_sam_base::check_and_assign_ref_id(ref_id_type & ref_id,
118  ref_id_tmp_type & ref_id_tmp,
119  header_type & header,
120  ref_seqs_type & /*tag*/)
121 {
122  if (!std::ranges::empty(ref_id_tmp)) // otherwise the std::optional will not be filled
123  {
124  auto search = header.ref_dict.find(ref_id_tmp);
125 
126  if (search == header.ref_dict.end())
127  {
128  if constexpr (detail::decays_to_ignore_v<ref_seqs_type>) // no reference information given
129  {
130  if (ref_info_present_in_header)
131  {
132  throw format_error{"Unknown reference id found in record which is not present in the header."};
133  }
134  else
135  {
136  header.ref_ids().push_back(ref_id_tmp);
137  auto pos = std::ranges::size(header.ref_ids()) - 1;
138  header.ref_dict[header.ref_ids()[pos]] = pos;
139  ref_id = pos;
140  }
141  }
142  else
143  {
144  throw format_error{"Unknown reference id found in record which is not present in the given ids."};
145  }
146  }
147  else
148  {
149  ref_id = search->second;
150  }
151  }
152 }
153 
159 inline void format_sam_base::transfer_soft_clipping_to(std::vector<cigar> const & cigar_vector,
160  int32_t & sc_begin,
161  int32_t & sc_end) const
162 {
163  // Checks if the given index in the cigar vector is a soft clip.
164  auto soft_clipping_at = [&](size_t const index)
165  {
166  return cigar_vector[index] == 'S'_cigar_operation;
167  };
168  // Checks if the given index in the cigar vector is a hard clip.
169  auto hard_clipping_at = [&](size_t const index)
170  {
171  return cigar_vector[index] == 'H'_cigar_operation;
172  };
173  // Checks if the given cigar vector as at least min_size many elements.
174  auto vector_size_at_least = [&](size_t const min_size)
175  {
176  return cigar_vector.size() >= min_size;
177  };
178  // Returns the cigar count of the ith cigar element in the given cigar vector.
179  auto cigar_count_at = [&](size_t const index)
180  {
181  return get<0>(cigar_vector[index]);
182  };
183 
184  // check for soft clipping at the first two positions
185  if (vector_size_at_least(1) && soft_clipping_at(0))
186  sc_begin = cigar_count_at(0);
187  else if (vector_size_at_least(2) && hard_clipping_at(0) && soft_clipping_at(1))
188  sc_begin = cigar_count_at(1);
189 
190  // Check for soft clipping at the last two positions. But only if the vector size has at least 2, respectively
191  // 3 elements. Accordingly, if the following arithmetics overflow they are protected by the corresponding
192  // if expressions below.
193  auto last_index = cigar_vector.size() - 1;
194  auto second_last_index = last_index - 1;
195 
196  if (vector_size_at_least(2) && soft_clipping_at(last_index))
197  sc_end = cigar_count_at(last_index);
198  else if (vector_size_at_least(3) && hard_clipping_at(last_index) && soft_clipping_at(second_last_index))
199  sc_end = cigar_count_at(second_last_index);
200 }
201 
212 template <typename align_type, typename ref_seqs_type>
213 inline void format_sam_base::construct_alignment(align_type & align,
214  std::vector<cigar> & cigar_vector,
215  [[maybe_unused]] int32_t rid,
216  [[maybe_unused]] ref_seqs_type & ref_seqs,
217  [[maybe_unused]] int32_t ref_start,
218  size_t ref_length)
219 {
220  if (rid > -1 && ref_start > -1 && // read is mapped
221  !cigar_vector.empty() && // alignment field was not empty
222  !std::ranges::empty(get<1>(align))) // seq field was not empty
223  {
224  if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
225  {
226  assert(static_cast<size_t>(ref_start + ref_length) <= std::ranges::size(ref_seqs[rid]));
227  // copy over unaligned reference sequence part
228  assign_unaligned(get<0>(align), ref_seqs[rid] | views::slice(ref_start, ref_start + ref_length));
229  }
230  else
231  {
232  using unaligned_t = std::remove_cvref_t<detail::unaligned_seq_t<decltype(get<0>(align))>>;
233  auto dummy_seq = views::repeat_n(std::ranges::range_value_t<unaligned_t>{}, ref_length)
234  | std::views::transform(detail::access_restrictor_fn{});
235  static_assert(std::same_as<unaligned_t, decltype(dummy_seq)>,
236  "No reference information was given so the type of the first alignment tuple position"
237  "must have an unaligned sequence type of a dummy sequence ("
238  "views::repeat_n(dna5{}, size_t{}) | "
239  "std::views::transform(detail::access_restrictor_fn{}))");
240 
241  assign_unaligned(get<0>(align), dummy_seq); // assign dummy sequence
242  }
243 
244  // insert gaps according to the cigar information
245  detail::alignment_from_cigar(align, cigar_vector);
246  }
247  else // not enough information for an alignment, assign an empty view/dummy_sequence
248  {
249  if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>) // reference info given
250  {
251  assert(std::ranges::size(ref_seqs) > 0); // we assume that the given ref info is not empty
252  assign_unaligned(get<0>(align), ref_seqs[0] | views::slice(0, 0));
253  }
254  else
255  {
256  using unaligned_t = std::remove_cvref_t<detail::unaligned_seq_t<decltype(get<0>(align))>>;
257  assign_unaligned(get<0>(align),
258  views::repeat_n(std::ranges::range_value_t<unaligned_t>{}, 0)
259  | std::views::transform(detail::access_restrictor_fn{}));
260  }
261  }
262 }
263 
273 template <typename stream_view_t>
274 inline void format_sam_base::read_byte_field(stream_view_t && stream_view, std::byte & byte_target)
275 {
276  // unfortunately std::from_chars only accepts char const * so we need a buffer.
277  auto [ignore, end] = std::ranges::copy(stream_view, arithmetic_buffer.data());
278  (void)ignore;
279 
280  uint8_t byte{};
281  // std::from_chars cannot directly parse into a std::byte
282  std::from_chars_result res = std::from_chars(arithmetic_buffer.begin(), end, byte, 16);
283 
284  if (res.ec == std::errc::invalid_argument || res.ptr != end)
285  throw format_error{std::string("[CORRUPTED SAM FILE] The string '")
286  + std::string(arithmetic_buffer.begin(), end) + "' could not be cast into type uint8_t."};
287 
288  if (res.ec == std::errc::result_out_of_range)
289  throw format_error{std::string("[CORRUPTED SAM FILE] Casting '") + std::string(arithmetic_buffer.begin(), end)
290  + "' into type uint8_t would cause an overflow."};
291  byte_target = std::byte{byte};
292 }
293 
301 template <typename stream_view_type, std::ranges::forward_range target_range_type>
302 inline void format_sam_base::read_forward_range_field(stream_view_type && stream_view, target_range_type & target)
303 {
304  using target_range_value_t = std::ranges::range_value_t<target_range_type>;
305  using begin_iterator_t = std::ranges::iterator_t<stream_view_type>;
306  using end_iterator_t = std::ranges::sentinel_t<stream_view_type>;
307 
308  // Note that we need to cache the begin iterator since the stream_view is an input range that may be consuming
309  // and in that case might read `past-the-end` on a second call of std::ranges::begin.
310  if (auto it = std::ranges::begin(stream_view); it != std::ranges::end(stream_view))
311  {
312  // Write to target if field does not represent an empty string, denoted as single '*' character.
313  if (char c = *it; !(++it == std::ranges::end(stream_view) && c == '*'))
314  {
315  target.push_back(seqan3::assign_char_to(c, target_range_value_t{}));
316  std::ranges::copy(std::ranges::subrange<begin_iterator_t, end_iterator_t>{it, std::ranges::end(stream_view)}
317  | views::char_to<target_range_value_t>,
318  std::back_inserter(target));
319  }
320  }
321 }
322 
333 template <typename stream_view_t, arithmetic arithmetic_target_type>
334 inline void format_sam_base::read_arithmetic_field(stream_view_t && stream_view,
335  arithmetic_target_type & arithmetic_target)
336 {
337  // unfortunately std::from_chars only accepts char const * so we need a buffer.
338  auto [ignore, end] = std::ranges::copy(stream_view, arithmetic_buffer.data());
339  (void)ignore;
340  std::from_chars_result res = std::from_chars(arithmetic_buffer.begin(), end, arithmetic_target);
341 
342  if (res.ec == std::errc::invalid_argument || res.ptr != end)
343  throw format_error{std::string("[CORRUPTED SAM FILE] The string '")
344  + std::string(arithmetic_buffer.begin(), end) + "' could not be cast into type "
345  + detail::type_name_as_string<arithmetic_target_type>};
346 
347  if (res.ec == std::errc::result_out_of_range)
348  throw format_error{std::string("[CORRUPTED SAM FILE] Casting '") + std::string(arithmetic_buffer.begin(), end)
349  + "' into type " + detail::type_name_as_string<arithmetic_target_type>
350  + " would cause an overflow."};
351 }
352 
369 template <typename stream_view_type, typename ref_ids_type, typename ref_seqs_type>
370 inline void format_sam_base::read_header(stream_view_type && stream_view,
371  sam_file_header<ref_ids_type> & hdr,
372  ref_seqs_type & /*ref_id_to_pos_map*/)
373 {
374  auto it = std::ranges::begin(stream_view);
375  auto end = std::ranges::end(stream_view);
376  std::vector<char> string_buffer{};
377 
378  auto make_tag = [](uint8_t char1, uint8_t char2) constexpr
379  {
380  return static_cast<uint16_t>(char1) | (static_cast<uint16_t>(char2) << CHAR_BIT);
381  };
382 
383  std::array<char, 2> raw_tag{};
384 
385  auto parse_and_make_tag = [&]()
386  {
387  raw_tag[0] = *it;
388  ++it;
389  raw_tag[1] = *it;
390  ++it;
391  return make_tag(raw_tag[0], raw_tag[1]);
392  };
393 
394  auto take_until_predicate = [&it, &string_buffer](auto const & predicate)
395  {
396  string_buffer.clear();
397  while (!predicate(*it))
398  {
399  string_buffer.push_back(*it);
400  ++it;
401  }
402  };
403 
404  auto skip_until_predicate = [&it](auto const & predicate)
405  {
406  while (!predicate(*it))
407  ++it;
408  };
409 
410  auto copy_next_tag_value_into_buffer = [&]()
411  {
412  skip_until_predicate(is_char<':'>);
413  ++it; // skip :
414  take_until_predicate(is_char<'\t'> || is_char<'\n'>);
415  };
416 
417  // Some tags are not parsed individually. Instead, these are simply copied into a std::string.
418  // Multiple tags must be separated by a `\t`, hence we prepend a tab to the string, except the first time.
419  // Alternatively, we could always append a `\t`, but this would have the side effect that we might need to trim a
420  // trailing tab after parsing all tags via `pop_back()`.
421  // Unfortunately, we do not know when we are parsing the last tag (and in this case just not append a tab),
422  // because even though we can check if the line ends in a `\n`, it is not guaranteed that the last tag of the
423  // line is passed to this lambda. For example, the line might end with a tag that is properly parsed, such as `ID`.
424  auto parse_and_append_unhandled_tag_to_string = [&](std::string & value, std::array<char, 2> raw_tag)
425  {
426  take_until_predicate(is_char<'\t'> || is_char<'\n'>);
427  if (!value.empty())
428  value.push_back('\t');
429  value.push_back(raw_tag[0]);
430  value.push_back(raw_tag[1]);
431  read_forward_range_field(string_buffer, value);
432  };
433 
434  auto print_cerr_of_unspported_tag = [&it](char const * const header_tag, std::array<char, 2> raw_tag)
435  {
436  std::cerr << "Unsupported SAM header tag in @" << header_tag << ": " << raw_tag[0] << raw_tag[1] << '\n';
437  };
438 
439  while (it != end && is_char<'@'>(*it))
440  {
441  ++it; // skip @
442 
443  switch (parse_and_make_tag())
444  {
445  case make_tag('H', 'D'): // HD (header) tag
446  {
447  // All tags can appear in any order, VN is the only required tag
448  while (is_char<'\t'>(*it))
449  {
450  ++it; // skip tab
451  std::string * header_entry{nullptr};
452 
453  switch (parse_and_make_tag())
454  {
455  case make_tag('V', 'N'): // parse required VN (version) tag
456  {
457  header_entry = std::addressof(hdr.format_version);
458  break;
459  }
460  case make_tag('S', 'O'): // SO (sorting) tag
461  {
462  header_entry = std::addressof(hdr.sorting);
463  break;
464  }
465  case make_tag('S', 'S'): // SS (sub-order) tag
466  {
467  header_entry = std::addressof(hdr.subsorting);
468  break;
469  }
470  case make_tag('G', 'O'): // GO (grouping) tag
471  {
472  header_entry = std::addressof(hdr.grouping);
473  break;
474  }
475  default: // unsupported header tag
476  {
477  print_cerr_of_unspported_tag("HD", raw_tag);
478  }
479  }
480 
481  if (header_entry != nullptr)
482  {
483  copy_next_tag_value_into_buffer();
484  read_forward_range_field(string_buffer, *header_entry);
485  }
486  else
487  skip_until_predicate(is_char<'\t'> || is_char<'\n'>);
488  }
489  ++it; // skip newline
490 
491  if (hdr.format_version.empty())
492  throw format_error{std::string{"The required VN tag in @HD is missing."}};
493 
494  break;
495  }
496 
497  case make_tag('S', 'Q'): // SQ (sequence dictionary) tag
498  {
499  ref_info_present_in_header = true;
500  std::ranges::range_value_t<decltype(hdr.ref_ids())> id;
501  std::optional<int32_t> sequence_length{};
503 
504  // All tags can appear in any order, SN and LN are required tags
505  while (is_char<'\t'>(*it))
506  {
507  ++it; // skip tab
508 
509  switch (parse_and_make_tag())
510  {
511  case make_tag('S', 'N'): // parse required SN (sequence name) tag
512  {
513  copy_next_tag_value_into_buffer();
514  read_forward_range_field(string_buffer, id);
515  break;
516  }
517  case make_tag('L', 'N'): // parse required LN (length) tag
518  {
519  int32_t sequence_length_tmp{};
520  copy_next_tag_value_into_buffer();
521  read_arithmetic_field(string_buffer, sequence_length_tmp);
522  sequence_length = sequence_length_tmp;
523  break;
524  }
525  default: // Any other tag
526  {
527  parse_and_append_unhandled_tag_to_string(get<1>(info), raw_tag);
528  }
529  }
530  }
531  ++it; // skip newline
532 
533  if (id.empty())
534  throw format_error{std::string{"The required SN tag in @SQ is missing."}};
535  if (!sequence_length.has_value())
536  throw format_error{std::string{"The required LN tag in @SQ is missing."}};
537  if (sequence_length.value() <= 0)
538  throw format_error{std::string{"The value of LN in @SQ must be positive."}};
539 
540  get<0>(info) = sequence_length.value();
541  // If reference information was given, the ids exist and we can fill ref_dict directly.
542  // If not, we need to update the ids first and fill the reference dictionary afterwards.
543  if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>) // reference information given
544  {
545  auto id_it = hdr.ref_dict.find(id);
546 
547  if (id_it == hdr.ref_dict.end())
548  throw format_error{detail::to_string("Unknown reference name '",
549  id,
550  "' found in SAM header ",
551  "(header.ref_ids(): ",
552  hdr.ref_ids(),
553  ").")};
554 
555  auto & given_ref_info = hdr.ref_id_info[id_it->second];
556 
557  if (std::get<0>(given_ref_info) != std::get<0>(info))
558  throw format_error{"Provided and header-based reference length differ."};
559 
560  hdr.ref_id_info[id_it->second] = std::move(info);
561  }
562  else
563  {
564  static_assert(!detail::is_type_specialisation_of_v<decltype(hdr.ref_ids()), std::deque>,
565  "The range over reference ids must be of type std::deque such that pointers are not "
566  "invalidated.");
567 
568  hdr.ref_ids().push_back(id);
569  hdr.ref_id_info.push_back(info);
570  hdr.ref_dict[(hdr.ref_ids())[(hdr.ref_ids()).size() - 1]] = (hdr.ref_ids()).size() - 1;
571  }
572  break;
573  }
574 
575  case make_tag('R', 'G'): // RG (read group) tag
576  {
578 
579  // All tags can appear in any order, SN and LN are required tags
580  while (is_char<'\t'>(*it))
581  {
582  ++it; // skip tab
583 
584  switch (parse_and_make_tag())
585  {
586  case make_tag('I', 'D'): // parse required ID tag
587  {
588  copy_next_tag_value_into_buffer();
589  read_forward_range_field(string_buffer, get<0>(tmp));
590  break;
591  }
592  default: // Any other tag
593  {
594  parse_and_append_unhandled_tag_to_string(get<1>(tmp), raw_tag);
595  }
596  }
597  }
598  ++it; // skip newline
599 
600  if (get<0>(tmp).empty())
601  throw format_error{std::string{"The required ID tag in @RG is missing."}};
602 
603  hdr.read_groups.emplace_back(std::move(tmp));
604  break;
605  }
606 
607  case make_tag('P', 'G'): // PG (program) tag
608  {
609  typename sam_file_header<ref_ids_type>::program_info_t tmp{};
610 
611  // All tags can appear in any order, ID is the only required tag
612  while (is_char<'\t'>(*it))
613  {
614  ++it; // skip tab
615  std::string * program_info_entry{nullptr};
616 
617  switch (parse_and_make_tag())
618  {
619  case make_tag('I', 'D'): // read required ID tag
620  {
621  program_info_entry = std::addressof(tmp.id);
622  break;
623  }
624  case make_tag('P', 'N'): // PN (program name) tag
625  {
626  program_info_entry = std::addressof(tmp.name);
627  break;
628  }
629  case make_tag('P', 'P'): // PP (previous program) tag
630  {
631  program_info_entry = std::addressof(tmp.previous);
632  break;
633  }
634  case make_tag('C', 'L'): // CL (command line) tag
635  {
636  program_info_entry = std::addressof(tmp.command_line_call);
637  break;
638  }
639  case make_tag('D', 'S'): // DS (description) tag
640  {
641  program_info_entry = std::addressof(tmp.description);
642  break;
643  }
644  case make_tag('V', 'N'): // VN (version) tag
645  {
646  program_info_entry = std::addressof(tmp.version);
647  break;
648  }
649  default: // unsupported header tag
650  {
651  print_cerr_of_unspported_tag("PG", raw_tag);
652  }
653  }
654 
655  if (program_info_entry != nullptr)
656  {
657  copy_next_tag_value_into_buffer();
658  read_forward_range_field(string_buffer, *program_info_entry);
659  }
660  else
661  skip_until_predicate(is_char<'\t'> || is_char<'\n'>);
662  }
663  ++it; // skip newline
664 
665  if (tmp.id.empty())
666  throw format_error{std::string{"The required ID tag in @PG is missing."}};
667 
668  hdr.program_infos.emplace_back(std::move(tmp));
669  break;
670  }
671 
672  case make_tag('C', 'O'): // CO (comment) tag
673  {
674  ++it; // skip tab
675  std::string tmp;
676  take_until_predicate(is_char<'\n'>);
677  read_forward_range_field(string_buffer, tmp);
678  ++it; // skip newline
679  hdr.comments.emplace_back(std::move(tmp));
680  break;
681  }
682 
683  default:
684  throw format_error{std::string{"Illegal SAM header tag starting with:"} + *it};
685  }
686  }
687 }
688 
705 template <typename stream_t, typename ref_ids_type>
706 inline void format_sam_base::write_header(stream_t & stream,
707  sam_file_output_options const & options,
708  sam_file_header<ref_ids_type> & header)
709 {
710  // -----------------------------------------------------------------
711  // Check Header
712  // -----------------------------------------------------------------
713 
714  // (@HD) Check header line
715  // The format version string will be taken from the local member variable
716  if (!header.sorting.empty()
717  && !(header.sorting == "unknown" || header.sorting == "unsorted" || header.sorting == "queryname"
718  || header.sorting == "coordinate"))
719  throw format_error{"SAM format error: The header.sorting member must be "
720  "one of [unknown, unsorted, queryname, coordinate]."};
721 
722  if (!header.grouping.empty()
723  && !(header.grouping == "none" || header.grouping == "query" || header.grouping == "reference"))
724  throw format_error{"SAM format error: The header.grouping member must be "
725  "one of [none, query, reference]."};
726 
727  // (@SQ) Check Reference Sequence Dictionary lines
728 
729  // TODO
730 
731  // - sorting order be one of ...
732  // - grouping can be one of ...
733  // - reference names must be unique
734  // - ids of read groups must be unique
735  // - program ids need to be unique
736  // many more small semantic things, like fits REGEX
737 
738  // -----------------------------------------------------------------
739  // Write Header
740  // -----------------------------------------------------------------
741  std::ostreambuf_iterator stream_it{stream};
742 
743  // (@HD) Write header line [required].
744  stream << "@HD\tVN:";
745  std::ranges::copy(format_version, stream_it);
746 
747  if (!header.sorting.empty())
748  stream << "\tSO:" << header.sorting;
749 
750  if (!header.subsorting.empty())
751  stream << "\tSS:" << header.subsorting;
752 
753  if (!header.grouping.empty())
754  stream << "\tGO:" << header.grouping;
755 
756  detail::write_eol(stream_it, options.add_carriage_return);
757 
758  // (@SQ) Write Reference Sequence Dictionary lines [required].
759  for (auto const & [ref_name, ref_info] : views::zip(header.ref_ids(), header.ref_id_info))
760  {
761  stream << "@SQ\tSN:";
762 
763  std::ranges::copy(ref_name, stream_it);
764 
765  stream << "\tLN:" << get<0>(ref_info);
766 
767  if (!get<1>(ref_info).empty())
768  stream << "\t" << get<1>(ref_info);
769 
770  detail::write_eol(stream_it, options.add_carriage_return);
771  }
772 
773  // Write read group (@RG) lines if specified.
774  for (auto const & read_group : header.read_groups)
775  {
776  stream << "@RG"
777  << "\tID:" << get<0>(read_group);
778 
779  if (!get<1>(read_group).empty())
780  stream << "\t" << get<1>(read_group);
781 
782  detail::write_eol(stream_it, options.add_carriage_return);
783  }
784 
785  // Write program (@PG) lines if specified.
786  for (auto const & program : header.program_infos)
787  {
788  stream << "@PG"
789  << "\tID:" << program.id;
790 
791  if (!program.name.empty())
792  stream << "\tPN:" << program.name;
793 
794  if (!program.command_line_call.empty())
795  stream << "\tCL:" << program.command_line_call;
796 
797  if (!program.previous.empty())
798  stream << "\tPP:" << program.previous;
799 
800  if (!program.description.empty())
801  stream << "\tDS:" << program.description;
802 
803  if (!program.version.empty())
804  stream << "\tVN:" << program.version;
805 
806  detail::write_eol(stream_it, options.add_carriage_return);
807  }
808 
809  // Write comment (@CO) lines if specified.
810  for (auto const & comment : header.comments)
811  {
812  stream << "@CO\t" << comment;
813  detail::write_eol(stream_it, options.add_carriage_return);
814  }
815 }
816 
817 } // namespace seqan3::detail
T addressof(T... args)
T back_inserter(T... args)
Provides seqan3::views::char_to.
Provides various utility functions.
T empty(T... args)
T end(T... args)
T find(T... args)
constexpr auto assign_char_to
Assign a character to an alphabet object.
Definition: alphabet/concept.hpp:524
@ comment
Comment field of arbitrary content, usually a string.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ id
The identifier, usually a string.
requires std::ranges::forward_range< std::ranges::range_reference_t< queries_t > > &&std::same_as< range_innermost_value_t< queries_t >, typename index_t::alphabet_type > auto search(queries_t &&queries, index_t const &index, configuration_t const &cfg=search_cfg::default_configuration)
Search a query or a range of queries in an index.
Definition: search.hpp:103
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: type_list/traits.hpp:469
constexpr size_t size
The size of a type pack.
Definition: type_pack/traits.hpp:146
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:178
constexpr auto zip
A view adaptor that takes several views and returns tuple-like values from every i-th element of each...
Definition: zip.hpp:573
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:91
Provides the seqan3::sam_file_header class.
Provides various utility functions.
Auxiliary functions for the alignment IO.
T push_back(T... args)
Provides seqan3::debug_stream and related types.
The <ranges> header from C++20's standard library.
Provides seqan3::views::repeat_n.
Provides seqan3::sam_file_output_format and auxiliary classes.
T size(T... args)
Provides seqan3::views::slice.
Provides traits to inspect some information of a type, for example its name.
Provides seqan3::views::zip.