SeqAn3  3.2.0-rc.1
The Modern C++ library for sequence analysis.
alignment_configurator.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <functional>
16 #include <tuple>
17 #include <utility>
18 #include <vector>
19 
64 
65 namespace seqan3::detail
66 {
67 
77 template <typename range_type, typename alignment_config_type>
78 struct alignment_contract
79 {
80 private:
85  using unref_range_type = std::remove_reference_t<range_type>;
87  using first_seq_t = std::tuple_element_t<0, std::ranges::range_value_t<unref_range_type>>;
89  using second_seq_t = std::tuple_element_t<1, std::ranges::range_value_t<unref_range_type>>;
91 
92 public:
94  static constexpr bool expects_tuple_like_value_type()
95  {
97  && std::tuple_size_v<std::ranges::range_value_t<unref_range_type>> == 2;
98  }
99 
101  static constexpr bool expects_valid_scoring_scheme()
102  {
103  if constexpr (alignment_config_type::template exists<align_cfg::scoring_scheme>())
104  {
105  using scoring_type = std::remove_reference_t<
106  decltype(get<align_cfg::scoring_scheme>(std::declval<alignment_config_type>()).scheme)>;
107  return static_cast<bool>(scoring_scheme_for<scoring_type,
108  std::ranges::range_value_t<first_seq_t>,
109  std::ranges::range_value_t<second_seq_t>>);
110  }
111  else
112  {
113  return false;
114  }
115  }
116 
118  static constexpr bool expects_alignment_configuration()
119  {
120  bool const is_global = alignment_config_type::template exists<seqan3::align_cfg::method_global>();
121  bool const is_local = alignment_config_type::template exists<seqan3::align_cfg::method_local>();
122 
123  return (is_global || is_local);
124  }
125 };
126 
131 struct alignment_configurator
132 {
133 private:
137  template <typename traits_t>
138  struct select_matrix_policy
139  {
140  private:
142  static constexpr bool only_coordinates =
143  !(traits_t::compute_begin_positions || traits_t::compute_sequence_alignment);
144 
146  using score_matrix_t =
147  std::conditional_t<traits_t::is_banded,
148  alignment_score_matrix_one_column_banded<typename traits_t::score_type>,
149  alignment_score_matrix_one_column<typename traits_t::score_type>>;
151  using trace_matrix_t =
152  std::conditional_t<traits_t::is_banded,
153  alignment_trace_matrix_full_banded<typename traits_t::trace_type, only_coordinates>,
154  alignment_trace_matrix_full<typename traits_t::trace_type, only_coordinates>>;
155 
156  public:
158  using type = deferred_crtp_base<alignment_matrix_policy, score_matrix_t, trace_matrix_t>;
159  };
160 
164  template <typename traits_t>
165  struct select_gap_policy
166  {
167  private:
169  using score_t = typename traits_t::score_type;
171  using is_local_t = std::bool_constant<traits_t::is_local>;
172 
173  public:
175  using type = std::conditional_t<traits_t::is_vectorised,
176  deferred_crtp_base<simd_affine_gap_policy, score_t, is_local_t>,
177  deferred_crtp_base<affine_gap_policy, score_t, is_local_t>>;
178  };
179 
186  template <typename traits_t>
187  struct select_find_optimum_policy
188  {
189  private:
191  using score_t = typename traits_t::score_type;
192 
193  public:
195  using type = std::conditional_t<traits_t::is_vectorised,
196  deferred_crtp_base<simd_find_optimum_policy, score_t>,
197  deferred_crtp_base<find_optimum_policy>>;
198  };
199 
201  template <typename traits_t, typename... args_t>
202  using select_alignment_algorithm_t = lazy_conditional_t<traits_t::is_banded,
203  lazy<pairwise_alignment_algorithm_banded, args_t...>,
204  lazy<pairwise_alignment_algorithm, args_t...>>;
205 
209  template <typename config_t>
210  struct select_gap_recursion_policy
211  {
212  private:
214  using traits_type = alignment_configuration_traits<config_t>;
216  static constexpr bool with_trace = traits_type::requires_trace_information;
217 
219  using gap_recursion_policy_type = std::conditional_t<with_trace,
220  policy_affine_gap_with_trace_recursion<config_t>,
221  policy_affine_gap_recursion<config_t>>;
223  using banded_gap_recursion_policy_type =
224  std::conditional_t<with_trace,
225  policy_affine_gap_with_trace_recursion_banded<config_t>,
226  policy_affine_gap_recursion_banded<config_t>>;
227 
228  public:
230  using type =
232  };
233 
234 public:
261  template <align_pairwise_range_input sequences_t, typename config_t>
262  requires is_type_specialisation_of_v<config_t, configuration>
263  static constexpr auto configure(config_t const & cfg)
264  {
265  auto config_with_output = maybe_default_output(cfg);
266  using config_with_output_t = decltype(config_with_output);
267 
268  // ----------------------------------------------------------------------------
269  // Configure the type-erased alignment function.
270  // ----------------------------------------------------------------------------
271 
272  using first_seq_t = std::tuple_element_t<0, std::ranges::range_value_t<sequences_t>>;
273  using second_seq_t = std::tuple_element_t<1, std::ranges::range_value_t<sequences_t>>;
274 
275  using wrapped_first_t = type_reduce_t<first_seq_t &>;
276  using wrapped_second_t = type_reduce_t<second_seq_t &>;
277 
278  // The alignment executor passes a chunk over an indexed sequence pair range to the alignment algorithm.
279  using indexed_sequence_pair_range_t = typename chunked_indexed_sequence_pairs<sequences_t>::type;
280  using indexed_sequence_pair_chunk_t = std::ranges::range_value_t<indexed_sequence_pair_range_t>;
281 
282  // Select the result type based on the sequences and the configuration.
283  using alignment_result_value_t = typename align_result_selector<std::remove_reference_t<wrapped_first_t>,
285  config_with_output_t>::type;
286  using alignment_result_t = alignment_result<alignment_result_value_t>;
287  using callback_on_result_t = std::function<void(alignment_result_t)>;
288  // Define the function wrapper type.
289  using function_wrapper_t = std::function<void(indexed_sequence_pair_chunk_t, callback_on_result_t)>;
290 
291  // Capture the alignment result type.
292  auto config_with_result_type = config_with_output | align_cfg::detail::result_type<alignment_result_t>{};
293 
294  // ----------------------------------------------------------------------------
295  // Test some basic preconditions
296  // ----------------------------------------------------------------------------
297 
298  using alignment_contract_t = alignment_contract<sequences_t, config_with_output_t>;
299 
300  static_assert(alignment_contract_t::expects_alignment_configuration(),
301  "Alignment configuration error: "
302  "The alignment can only be configured with alignment configurations.");
303 
304  static_assert(alignment_contract_t::expects_tuple_like_value_type(),
305  "Alignment configuration error: "
306  "The value type of the sequence ranges must model the seqan3::tuple_like and must contain "
307  "exactly 2 elements.");
308 
309  static_assert(alignment_contract_t::expects_valid_scoring_scheme(),
310  "Alignment configuration error: "
311  "Either the scoring scheme was not configured or the given scoring scheme cannot be invoked with "
312  "the value types of the passed sequences.");
313 
314  // ----------------------------------------------------------------------------
315  // Configure the algorithm
316  // ----------------------------------------------------------------------------
317 
318  // Use default edit distance if gaps are not set.
319  align_cfg::gap_cost_affine edit_gap_cost{};
320  auto const & gap_cost = config_with_result_type.get_or(edit_gap_cost);
321  auto const & scoring_scheme = get<align_cfg::scoring_scheme>(cfg).scheme;
322 
323  if constexpr (config_t::template exists<seqan3::align_cfg::method_global>())
324  {
325  // Only use edit distance if ...
326  auto method_global_cfg = get<seqan3::align_cfg::method_global>(config_with_result_type);
327  // Only use edit distance if ...
328  if (gap_cost.open_score == 0 && // gap open score is not set,
329  !(method_global_cfg.free_end_gaps_sequence2_leading
330  || method_global_cfg.free_end_gaps_sequence2_trailing)
331  && // none of the free end gaps are set for second seq,
332  (method_global_cfg.free_end_gaps_sequence1_leading
333  == method_global_cfg
334  .free_end_gaps_sequence1_trailing)) // free ends for leading and trailing gaps are equal in first seq.
335  {
336  // TODO: Instead of relying on nucleotide scoring schemes we need to be able to determine the edit distance
337  // option via the scheme.
338  if constexpr (is_type_specialisation_of_v<std::remove_cvref_t<decltype(scoring_scheme)>,
339  nucleotide_scoring_scheme>)
340  {
341  if ((scoring_scheme.score('A'_dna15, 'A'_dna15) == 0)
342  && (scoring_scheme.score('A'_dna15, 'C'_dna15)) == -1)
343  {
344  return std::pair{configure_edit_distance<function_wrapper_t>(config_with_result_type),
345  config_with_result_type};
346  }
347  }
348  }
349  }
350 
351  // ----------------------------------------------------------------------------
352  // Check if invalid configuration was used.
353  // ----------------------------------------------------------------------------
354 
355  // Do not allow min score configuration for alignments not computing the edit distance.
356  if (config_t::template exists<align_cfg::min_score>())
357  throw invalid_alignment_configuration{"The align_cfg::min_score configuration is only allowed for the "
358  "specific edit distance computation."};
359  // Configure the alignment algorithm.
360  return std::pair{configure_scoring_scheme<function_wrapper_t>(config_with_result_type),
361  config_with_result_type};
362  }
363 
364 private:
374  template <typename config_t>
375  static constexpr auto maybe_default_output(config_t const & config) noexcept
376  {
377  using traits_t = alignment_configuration_traits<config_t>;
378 
379  if constexpr (traits_t::has_output_configuration)
380  return config;
381  else
382  return config | align_cfg::output_score{} | align_cfg::output_begin_position{}
383  | align_cfg::output_end_position{} | align_cfg::output_alignment{} | align_cfg::output_sequence1_id{}
384  | align_cfg::output_sequence2_id{};
385  }
386 
392  template <typename function_wrapper_t, typename config_t>
393  static constexpr function_wrapper_t configure_edit_distance(config_t const & cfg)
394  {
395  using traits_t = alignment_configuration_traits<config_t>;
396 
397  // ----------------------------------------------------------------------------
398  // Unsupported configurations
399  // ----------------------------------------------------------------------------
400 
401  if constexpr (traits_t::is_banded)
402  throw invalid_alignment_configuration{"Banded alignments are yet not supported."};
403 
404  // ----------------------------------------------------------------------------
405  // Configure semi-global alignment
406  // ----------------------------------------------------------------------------
407 
408  // Get the value for the sequence ends configuration.
409  auto method_global_cfg = cfg.get_or(align_cfg::method_global{});
410 
411  auto configure_edit_traits = [&](auto is_semi_global)
412  {
413  struct edit_traits_type
414  {
415  using is_semi_global_type [[maybe_unused]] = std::remove_cvref_t<decltype(is_semi_global)>;
416  };
417 
418  edit_distance_algorithm<std::remove_cvref_t<config_t>, edit_traits_type> algorithm{cfg};
419  return function_wrapper_t{std::move(algorithm)};
420  };
421 
422  // Check if it has free ends set for the first sequence trailing gaps.
423  auto has_free_ends_trailing = [&](auto first) constexpr
424  {
425  if constexpr (!decltype(first)::value)
426  {
427  return configure_edit_traits(std::false_type{});
428  }
429  else // Resolve correct property at runtime.
430  {
431  if (method_global_cfg.free_end_gaps_sequence1_trailing)
432  return configure_edit_traits(std::true_type{});
433  else
434  return configure_edit_traits(std::false_type{});
435  }
436  };
437 
438  // Check if it has free ends set for the first sequence leading gaps.
439  if (method_global_cfg.free_end_gaps_sequence1_leading)
440  return has_free_ends_trailing(std::true_type{});
441  else
442  return has_free_ends_trailing(std::false_type{});
443  }
444 
461  template <typename function_wrapper_t, typename config_t>
462  static constexpr function_wrapper_t configure_scoring_scheme(config_t const & cfg);
463 
478  template <typename function_wrapper_t, typename... policies_t, typename config_t>
479  static constexpr function_wrapper_t make_algorithm(config_t const & cfg)
480  {
481  using traits_t = alignment_configuration_traits<config_t>;
482 
483  // Temporarily we will use the new and the old alignment implementation in order to
484  // refactor step-by-step to the new implementation. The new implementation will be tested in
485  // macrobenchmarks to show that it maintains a high performance.
486 
487  // Use old alignment implementation if...
488  if constexpr (traits_t::is_local || // it is a local alignment,
489  traits_t::is_debug || // it runs in debug mode,
490  traits_t::compute_sequence_alignment || // it computes more than the begin position.
491  (traits_t::is_banded && traits_t::compute_begin_positions)
492  || // banded && more than end positions.
493  (traits_t::is_vectorised && traits_t::compute_end_positions)) // simd and more than the score.
494  {
495  using matrix_policy_t = typename select_matrix_policy<traits_t>::type;
496  using gap_policy_t = typename select_gap_policy<traits_t>::type;
497  using find_optimum_t = typename select_find_optimum_policy<traits_t>::type;
498  using gap_init_policy_t = deferred_crtp_base<affine_gap_init_policy>;
499 
500  return alignment_algorithm<config_t,
501  matrix_policy_t,
502  gap_policy_t,
503  find_optimum_t,
504  gap_init_policy_t,
505  policies_t...>{cfg};
506  }
507  else // Use new alignment algorithm implementation.
508  {
509  //----------------------------------------------------------------------------------------------------------
510  // Configure the optimum tracker policy.
511  //----------------------------------------------------------------------------------------------------------
512 
513  using scalar_optimum_updater_t =
515 
516  using optimum_tracker_policy_t =
517  lazy_conditional_t<traits_t::is_vectorised,
518  lazy<policy_optimum_tracker_simd, config_t, max_score_updater_simd_global>,
519  lazy<policy_optimum_tracker, config_t, scalar_optimum_updater_t>>;
520 
521  //----------------------------------------------------------------------------------------------------------
522  // Configure the gap scheme policy.
523  //----------------------------------------------------------------------------------------------------------
524 
525  using gap_cost_policy_t = typename select_gap_recursion_policy<config_t>::type;
526 
527  //----------------------------------------------------------------------------------------------------------
528  // Configure the result builder policy.
529  //----------------------------------------------------------------------------------------------------------
530 
531  using result_builder_policy_t = policy_alignment_result_builder<config_t>;
532 
533  //----------------------------------------------------------------------------------------------------------
534  // Configure the scoring scheme policy.
535  //----------------------------------------------------------------------------------------------------------
536 
537  using alignment_method_t = std::
538  conditional_t<traits_t::is_global, seqan3::align_cfg::method_global, seqan3::align_cfg::method_local>;
539 
540  using score_t = typename traits_t::score_type;
541  using scoring_scheme_t = typename traits_t::scoring_scheme_type;
542  constexpr bool is_aminoacid_scheme =
543  is_type_specialisation_of_v<scoring_scheme_t, aminoacid_scoring_scheme>;
544 
545  using simple_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
546  lazy<simd_match_mismatch_scoring_scheme,
547  score_t,
548  typename traits_t::scoring_scheme_alphabet_type,
549  alignment_method_t>,
550  void>;
551  using matrix_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
552  lazy<simd_matrix_scoring_scheme,
553  score_t,
554  typename traits_t::scoring_scheme_alphabet_type,
555  alignment_method_t>,
556  void>;
557 
558  using alignment_scoring_scheme_t =
559  std::conditional_t<traits_t::is_vectorised,
561  scoring_scheme_t>;
562 
563  using scoring_scheme_policy_t = policy_scoring_scheme<config_t, alignment_scoring_scheme_t>;
564 
565  //----------------------------------------------------------------------------------------------------------
566  // Configure the alignment matrix policy.
567  //----------------------------------------------------------------------------------------------------------
568 
569  using score_matrix_t = score_matrix_single_column<score_t>;
570  using trace_matrix_t = trace_matrix_full<trace_directions>;
571 
572  using alignment_matrix_t =
573  std::conditional_t<traits_t::requires_trace_information,
574  combined_score_and_trace_matrix<score_matrix_t, trace_matrix_t>,
575  score_matrix_t>;
576  using alignment_matrix_policy_t = policy_alignment_matrix<traits_t, alignment_matrix_t>;
577 
578  //----------------------------------------------------------------------------------------------------------
579  // Configure the final alignment algorithm.
580  //----------------------------------------------------------------------------------------------------------
581 
582  using algorithm_t = select_alignment_algorithm_t<traits_t,
583  config_t,
584  gap_cost_policy_t,
585  optimum_tracker_policy_t,
586  result_builder_policy_t,
587  scoring_scheme_policy_t,
588  alignment_matrix_policy_t>;
589  return algorithm_t{cfg};
590  }
591  }
592 };
593 
595 template <typename function_wrapper_t, typename config_t>
596 constexpr function_wrapper_t alignment_configurator::configure_scoring_scheme(config_t const & cfg)
597 {
598  using traits_t = alignment_configuration_traits<config_t>;
599 
600  using scoring_scheme_t = typename traits_t::scoring_scheme_type;
601  constexpr bool is_aminoacid_scheme = is_type_specialisation_of_v<scoring_scheme_t, aminoacid_scoring_scheme>;
602  using alignment_type_t = typename std::
603  conditional_t<traits_t::is_global, seqan3::align_cfg::method_global, seqan3::align_cfg::method_local>;
604 
605  using simple_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
606  lazy<simd_match_mismatch_scoring_scheme,
607  typename traits_t::score_type,
608  typename traits_t::scoring_scheme_alphabet_type,
609  alignment_type_t>,
610  void>;
611  using matrix_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
612  lazy<simd_matrix_scoring_scheme,
613  typename traits_t::score_type,
614  typename traits_t::scoring_scheme_alphabet_type,
615  alignment_type_t>,
616  void>;
617 
618  using alignment_scoring_scheme_t =
619  std::conditional_t<traits_t::is_vectorised,
621  scoring_scheme_t>;
622 
623  using scoring_scheme_policy_t = deferred_crtp_base<scoring_scheme_policy, alignment_scoring_scheme_t>;
624  return make_algorithm<function_wrapper_t, scoring_scheme_policy_t>(cfg);
625 }
627 } // namespace seqan3::detail
Provides seqan3::detail::affine_gap_init_policy.
Provides seqan3::detail::affine_gap_policy.
Provides configuration for alignment output.
Provides seqan3::align_cfg::detail::result_type.
Provides seqan3::detail::align_result_selector.
Provides concepts needed internally for the alignment algorithms.
Provides helper type traits for the configuration and execution of the alignment algorithm.
Provides seqan3::detail::alignment_algorithm.
Provides seqan3::detail::alignment_matrix_policy.
Provides seqan3::alignment_result.
Provides seqan3::detail::alignment_score_matrix_one_column.
Provides seqan3::detail::alignment_score_matrix_one_column_banded.
Provides seqan3::detail::alignment_trace_matrix_full.
Provides seqan3::detail::alignment_trace_matrix_full_banded.
Provides seqan3::aminoacid_scoring_scheme.
Provides seqan3::detail::combined_score_and_trace_matrix.
Provides seqan3::detail::deferred_crtp_base.
Provides seqan3::detail::edit_distance_algorithm.
Provides seqan3::detail::find_optimum_policy.
requires requires
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:164
A concept that requires that type be able to score two letters.
Whether a type behaves like a tuple.
Provides lazy template instantiation traits.
Provides seqan3::nucleotide_scoring_scheme.
Provides seqan3::detail::pairwise_alignment_algorithm.
Provides seqan3::detail::pairwise_alignment_algorithm.
Provides seqan3::detail::policy_affine_gap_recursion.
Provides seqan3::detail::policy_affine_gap_recursion_banded.
Provides seqan3::detail::policy_affine_gap_with_trace_recursion.
Provides seqan3::detail::policy_affine_gap_with_trace_recursion_banded.
Provides seqan3::detail::policy_alignment_matrix.
Provides seqan3::detail::policy_alignment_result_builder.
Provides seqan3::detail::policy_optimum_tracker.
Provides seqan3::detail::policy_optimum_tracker_simd.
Provides seqan3::detail::policy_scoring_scheme.
Provides seqan3::detail::score_matrix_single_column.
Provides seqan3::detail::scoring_scheme_policy.
Provides seqan3::simd::simd_type.
Provides seqan3::detail::simd_affine_gap_policy.
Provides seqan3::detail::simd_find_optimum_policy.
Provides seqan3::detail::simd_match_mismatch_scoring_scheme.
Provides seqan3::detail::simd_matrix_scoring_scheme.
Provides type traits for working with templates.
Provides seqan3::detail::trace_matrix_full.
Provides seqan3::views::type_reduce.
Provides seqan3::tuple_like.
Provides seqan3::views::zip.