SeqAn3  3.2.0-rc.1
The Modern C++ library for sequence analysis.
io/structure_file/output.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <cassert>
16 #include <filesystem>
17 #include <fstream>
18 #include <optional>
19 #include <ranges>
20 #include <string>
21 #include <type_traits>
22 #include <variant>
23 #include <vector>
24 
27 #include <seqan3/io/detail/record.hpp>
29 #include <seqan3/io/exception.hpp>
30 #include <seqan3/io/record.hpp>
40 
41 namespace seqan3
42 {
43 
44 // ----------------------------------------------------------------------------
45 // structure_file_output
46 // ----------------------------------------------------------------------------
47 
60 template <detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::structure>,
61  detail::type_list_of_structure_file_output_formats valid_formats_ = type_list<format_vienna>>
63 {
64 public:
70  using selected_field_ids = selected_field_ids_;
72  using valid_formats = valid_formats_;
74  using stream_char_type = char;
76 
79  field::id,
80  field::bpp,
88 
89  static_assert(
90  []() constexpr {
91  for (field f : selected_field_ids::as_array)
92  if (!field_ids::contains(f))
93  return false;
94  return true;
95  }(),
96  "You selected a field that is not valid for structure files, please refer to the documentation "
97  "of structure_file_output::field_ids for the accepted values.");
98 
99  static_assert(
100  []() constexpr {
103  }(),
104  "You may not select field::structured_seq and either of field::seq and field::structure "
105  "at the same time.");
106 
113  using value_type = void;
115  using reference = void;
117  using const_reference = void;
119  using size_type = void;
123  using iterator = detail::out_file_iterator<structure_file_output>;
125  using const_iterator = void;
127  using sentinel = std::default_sentinel_t;
129 
145 
162  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
163  primary_stream{new std::ofstream{}, stream_deleter_default}
164  {
165  primary_stream->rdbuf()->pubsetbuf(stream_buffer.data(), stream_buffer.size());
166  static_cast<std::basic_ofstream<char> *>(primary_stream.get())
167  ->open(filename, std::ios_base::out | std::ios::binary);
168 
169  if (!primary_stream->good())
170  throw file_open_error{"Could not open file " + filename.string() + " for writing."};
171 
172  // possibly add intermediate compression stream
173  secondary_stream = detail::make_secondary_ostream(*primary_stream, filename);
174 
175  // initialise format handler or throw if format is not found
176  detail::set_format(format, filename);
177  }
178 
195  template <output_stream stream_t, structure_file_output_format file_format>
196  requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, char>
197  structure_file_output(stream_t & stream,
198  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
199  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
200  primary_stream{&stream, stream_deleter_noop},
201  secondary_stream{&stream, stream_deleter_noop},
202  format{detail::structure_file_output_format_exposer<file_format>{}}
203  {
204  static_assert(list_traits::contains<file_format, valid_formats>,
205  "You selected a format that is not in the valid_formats of this file.");
206  }
207 
209  template <output_stream stream_t, structure_file_output_format file_format>
210  requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, char>
211  structure_file_output(stream_t && stream,
212  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
213  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
214  primary_stream{new stream_t{std::move(stream)}, stream_deleter_default},
215  secondary_stream{&*primary_stream, stream_deleter_noop},
216  format{detail::structure_file_output_format_exposer<file_format>{}}
217  {
218  static_assert(list_traits::contains<file_format, valid_formats>,
219  "You selected a format that is not in the valid_formats of this file.");
220  }
222 
244  iterator begin() noexcept
245  {
246  return {*this};
247  }
248 
263  sentinel end() noexcept
264  {
265  return {};
266  }
267 
286  template <typename record_t>
287  void push_back(record_t && r)
288  requires detail::record_like<record_t>
289  {
290  write_record(detail::get_or_ignore<field::seq>(r),
291  detail::get_or_ignore<field::id>(r),
292  detail::get_or_ignore<field::bpp>(r),
293  detail::get_or_ignore<field::structure>(r),
294  detail::get_or_ignore<field::structured_seq>(r),
295  detail::get_or_ignore<field::energy>(r),
296  detail::get_or_ignore<field::react>(r),
297  detail::get_or_ignore<field::react_err>(r),
298  detail::get_or_ignore<field::comment>(r),
299  detail::get_or_ignore<field::offset>(r));
300  }
301 
323  template <typename tuple_t>
324  void push_back(tuple_t && t)
325  requires tuple_like<tuple_t> && (!detail::record_like<tuple_t>)
326  {
327  // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
328  write_record(detail::get_or_ignore<selected_field_ids::index_of(field::seq)>(t),
329  detail::get_or_ignore<selected_field_ids::index_of(field::id)>(t),
330  detail::get_or_ignore<selected_field_ids::index_of(field::bpp)>(t),
331  detail::get_or_ignore<selected_field_ids::index_of(field::structure)>(t),
332  detail::get_or_ignore<selected_field_ids::index_of(field::structured_seq)>(t),
333  detail::get_or_ignore<selected_field_ids::index_of(field::energy)>(t),
334  detail::get_or_ignore<selected_field_ids::index_of(field::react)>(t),
335  detail::get_or_ignore<selected_field_ids::index_of(field::react_err)>(t),
336  detail::get_or_ignore<selected_field_ids::index_of(field::comment)>(t),
337  detail::get_or_ignore<selected_field_ids::index_of(field::offset)>(t));
338  }
339 
363  template <typename arg_t, typename... arg_types>
364  void emplace_back(arg_t && arg, arg_types &&... args)
365  {
366  push_back(std::tie(arg, args...));
367  }
368 
390  template <std::ranges::input_range rng_t>
391  structure_file_output & operator=(rng_t && range)
393  {
394  for (auto && record : range)
395  push_back(std::forward<decltype(record)>(record));
396  return *this;
397  }
398 
426  template <std::ranges::input_range rng_t>
427  friend structure_file_output & operator|(rng_t && range, structure_file_output & f)
429  {
430  f = range;
431  return f;
432  }
433 
435  template <std::ranges::input_range rng_t>
436  friend structure_file_output operator|(rng_t && range, structure_file_output && f)
438  {
439  f = range;
440  return std::move(f);
441  }
443 
445  structure_file_output_options options;
446 
451  {
452  return *secondary_stream;
453  }
455 
456 protected:
459  std::vector<char> stream_buffer{std::vector<char>(1'000'000)};
460 
468  static void stream_deleter_noop(std::basic_ostream<stream_char_type> *)
469  {}
471  static void stream_deleter_default(std::basic_ostream<stream_char_type> * ptr)
472  {
473  delete ptr;
474  }
475 
477  stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
479  stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
480 
482  using format_type =
483  typename detail::variant_from_tags<valid_formats, detail::structure_file_output_format_exposer>::type;
485  format_type format;
487 
489  template <typename seq_type,
490  typename id_type,
491  typename bpp_type,
492  typename structure_type,
493  typename structured_seq_type,
494  typename energy_type,
495  typename react_type,
496  typename comment_type,
497  typename offset_type>
498  void write_record(seq_type && seq,
499  id_type && id,
500  bpp_type && bpp,
501  structure_type && structure,
502  structured_seq_type && structured_seq,
503  energy_type && energy,
504  react_type && react,
505  react_type && react_error,
506  comment_type && comment,
507  offset_type && offset)
508  {
509  static_assert(detail::decays_to_ignore_v<structured_seq_type>
510  || (detail::decays_to_ignore_v<seq_type> && detail::decays_to_ignore_v<structure_type>),
511  "You may not select field::structured_seq and either of field::seq and field::structure "
512  "at the same time.");
513 
514  assert(!format.valueless_by_exception());
515  std::visit(
516  [&](auto & f)
517  {
518  if constexpr (!detail::decays_to_ignore_v<structured_seq_type>)
519  {
520  f.write_structure_record(*secondary_stream,
521  options,
522  structured_seq | views::elements<0>,
523  id,
524  bpp,
525  structured_seq | views::elements<1>,
526  energy,
527  react,
528  react_error,
529  comment,
530  offset);
531  }
532  else
533  {
534  f.write_structure_record(*secondary_stream,
535  options,
536  seq,
537  id,
538  bpp,
539  structure,
540  energy,
541  react,
542  react_error,
543  comment,
544  offset);
545  }
546  },
547  format);
548  }
549 
551  friend iterator;
552 };
553 
560 template <output_stream stream_t,
561  structure_file_output_format file_format,
562  detail::fields_specialisation selected_field_ids>
563 structure_file_output(stream_t &&, file_format const &, selected_field_ids const &)
565 
567 template <output_stream stream_t,
568  structure_file_output_format file_format,
569  detail::fields_specialisation selected_field_ids>
570 structure_file_output(stream_t &, file_format const &, selected_field_ids const &)
573 
574 } // namespace seqan3
A class for writing structured sequence files, e.g. Stockholm, Connect, Vienna, ViennaRNA bpp matrix ...
Definition: io/structure_file/output.hpp:63
std::default_sentinel_t sentinel
The type returned by end().
Definition: io/structure_file/output.hpp:127
void push_back(record_t &&r) requires detail
Write a seqan3::record to the file.
Definition: io/structure_file/output.hpp:287
structure_file_output(stream_t &&, file_format const &, selected_field_ids const &) -> structure_file_output< selected_field_ids, type_list< file_format >>
Deduction of the selected fields, the file format and the stream type.
structure_file_output(stream_t &, file_format const &, selected_field_ids const &) -> structure_file_output< selected_field_ids, type_list< file_format >>
This is an overloaded member function, provided for convenience. It differs from the above function o...
void const_reference
The const reference type (void).
Definition: io/structure_file/output.hpp:117
detail::out_file_iterator< structure_file_output > iterator
The iterator type of this view (an output iterator).
Definition: io/structure_file/output.hpp:123
char stream_char_type
Character type of the stream(s).
Definition: io/structure_file/output.hpp:74
structure_file_output & operator=(structure_file_output const &)=delete
Copy assignment is explicitly deleted, because you can't have multiple access to the same file.
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: io/structure_file/output.hpp:70
requires std::same_as< typename std::remove_reference_t< stream_t >::char_type, char > structure_file_output(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: io/structure_file/output.hpp:197
requires std::same_as< typename std::remove_reference_t< stream_t >::char_type, char > structure_file_output(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: io/structure_file/output.hpp:211
structure_file_output()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
structure_file_output(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: io/structure_file/output.hpp:161
void emplace_back(arg_t &&arg, arg_types &&... args)
Write a record to the file by passing individual fields.
Definition: io/structure_file/output.hpp:364
void size_type
The size type (void).
Definition: io/structure_file/output.hpp:119
void reference
The reference type (void).
Definition: io/structure_file/output.hpp:115
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: io/structure_file/output.hpp:244
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: io/structure_file/output.hpp:263
structure_file_output & operator=(structure_file_output &&)=default
Move assignment is defaulted.
structure_file_output(structure_file_output &&)=default
Move construction is defaulted.
void value_type
The value type (void).
Definition: io/structure_file/output.hpp:113
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: io/structure_file/output.hpp:72
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: io/structure_file/output.hpp:125
~structure_file_output()=default
Destructor is defaulted.
structure_file_output(structure_file_output const &)=delete
Copy construction is explicitly deleted, because you can't have multiple access to the same file.
T data(T... args)
Provides seqan3::views::elements.
Provides the seqan3::format_vienna.
T forward(T... args)
T get(T... args)
requires requires
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:164
requires std::common_with< typename std::remove_reference_t< validator1_type >::option_value_type, typename std::remove_reference_t< validator2_type >::option_value_type > auto operator|(validator1_type &&vali1, validator2_type &&vali2)
Enables the chaining of validators.
Definition: validators.hpp:1124
field
An enumerator for the fields used in file formats.
Definition: record.hpp:63
@ energy
Energy of a folded sequence, represented by one float number.
@ comment
Comment field of arbitrary content, usually a string.
@ structure
Fixed interactions, usually a string of structure alphabet characters.
@ bpp
Base pair probability matrix of interactions, usually a matrix of float numbers.
@ react
Reactivity values of the sequence characters given in a vector of float numbers.
@ react_err
Reactivity error values given in a vector corresponding to seqan3::field::react.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
@ structured_seq
Sequence and fixed interactions combined in one range.
@ id
The identifier, usually a string.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
requires constexpr seqan3::detail::template_specialisation_of< list_t, seqan3::type_list > bool contains
Whether a type occurs in a type list or not.
Definition: type_list/traits.hpp:252
The generic concept for structure file out formats.
Whether a type behaves like a tuple.
Provides exceptions used in the I/O module.
Stream concepts.
Provides various utility functions required only for output.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Provides the seqan3::detail::out_file_iterator class template.
The <ranges> header from C++20's standard library.
Provides the seqan3::record template and the seqan3::field enum.
Provides seqan3::detail::record_like.
T size(T... args)
A class template that holds a choice of seqan3::field.
Definition: record.hpp:128
The class template that file records are based on; behaves like a std::tuple.
Definition: record.hpp:192
Provides seqan3::structure_file_output_format and auxiliary classes.
Provides seqan3::structure_file_output_options.
T tie(T... args)
Provides traits for seqan3::type_list.
Provides seqan3::tuple_like.
Provides seqan3::views::convert.
T visit(T... args)
Provides seqan3::views::zip.