libpappsomspp
Library for mass spectrometry
aamodification.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/amino_acid/aamodification.h
3 * \date 7/3/2015
4 * \author Olivier Langella
5 * \brief amino acid modification model
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2015 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 * Contributors:
27 * Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and
28 *implementation
29 ******************************************************************************/
30
31#include <QRegularExpression>
32#include <QDebug>
33#include <cmath>
34
35#include "aamodification.h"
36#include "aa.h"
37#include "../pappsoexception.h"
38#include "../mzrange.h"
39#include "../peptide/peptide.h"
40#include "../obo/filterobopsimodsink.h"
41#include "../obo/filterobopsimodtermaccession.h"
42#include "../exception/exceptionnotfound.h"
43
44/*
45
46inline void initMyResource() {
47 Q_INIT_RESOURCE(resources);
48}
49*/
50
51namespace pappso
52{
53
55
56AaModification::AaModification(const QString &accession, pappso_double mass)
57 : m_accession(accession), m_mass(mass)
58{
64
66 {Isotope::H2, 0},
67 {Isotope::N15, 0},
68 {Isotope::O17, 0},
69 {Isotope::O18, 0},
70 {Isotope::S33, 0},
71 {Isotope::S34, 0},
72 {Isotope::S36, 0}};
73}
74
75
77 : m_accession(toCopy.m_accession),
78 m_name(toCopy.m_name),
79 m_mass(toCopy.m_mass),
80 m_atomCount(std::move(toCopy.m_atomCount)),
81 m_mapIsotope(toCopy.m_mapIsotope)
82{
83 m_origin = toCopy.m_origin;
84}
85
87{
88}
89
90const QString &
92{
93
94 qDebug();
95 return m_accession;
96}
97const QString &
99{
100 return m_name;
101}
104 MapAccessionModifications ret;
105
106 return ret;
107 }();
108
111{
112 AaModification *new_mod;
113 // qDebug() << " AaModification::createInstance begin";
114 new_mod = new AaModification(term.m_accession, term.m_diffMono);
115 // xref: DiffFormula: "C 0 H 0 N 0 O 1 S 0"
116 new_mod->setDiffFormula(term.m_diffFormula);
117 new_mod->setXrefOrigin(term.m_origin);
118 new_mod->m_name = term.m_name;
119 return new_mod;
120}
121
123AaModification::createInstance(const QString &accession)
124{
125 if(accession == "internal:Nter_hydrolytic_cleavage_H")
126 {
127 OboPsiModTerm term;
128 term.m_accession = accession;
129 term.m_diffFormula = "H 1";
130 term.m_diffMono = MPROTIUM;
131 term.m_name = "Nter hydrolytic cleavage H+";
132 return (AaModification::createInstance(term));
133 }
134 if(accession == "internal:Cter_hydrolytic_cleavage_HO")
135 {
136 OboPsiModTerm term;
137 term.m_accession = accession;
138 term.m_diffFormula = "H 1 O 1";
140 term.m_name = "Cter hydrolytic cleavage HO";
141 return (AaModification::createInstance(term));
142 }
143 if(accession.startsWith("MUTATION:"))
144 {
145 QRegularExpression regexp_mutation("^MUTATION:([A-Z])=>([A-Z])$");
146 QRegularExpressionMatch match = regexp_mutation.match(accession);
147 if(match.hasMatch())
148 {
149 qDebug() << match.capturedTexts()[1].at(0) << " "
150 << match.capturedTexts()[2].at(0);
151
152 Aa aa_from(match.capturedTexts()[1].toStdString().c_str()[0]);
153 Aa aa_to(match.capturedTexts()[2].toStdString().c_str()[0]);
154 AaModificationP instance_mutation =
155 createInstanceMutation(aa_from, aa_to);
156 return instance_mutation;
157 // m_psiModLabel<<"|";
158 }
159 }
160 // initMyResource();
161 FilterOboPsiModSink term_list;
162 FilterOboPsiModTermAccession filterm_accession(term_list, accession);
163
164 OboPsiMod psimod(filterm_accession);
165
166 try
167 {
168 return (AaModification::createInstance(term_list.getOne()));
169 }
170 catch(ExceptionNotFound &e)
171 {
172 throw ExceptionNotFound(QObject::tr("modification not found : [%1]\n%2")
173 .arg(accession)
174 .arg(e.qwhat()));
175 }
176}
177
178void
179AaModification::setXrefOrigin(const QString &origin)
180{
181 // xref: Origin: "N"
182 // xref: Origin: "X"
183 m_origin = origin;
184}
185void
186AaModification::setDiffFormula(const QString &diff_formula)
187{
188 QRegularExpression rx("(^|\\s)([C,H,O,N,H,S])\\s([-]{0,1}\\d+)");
189 QRegularExpressionMatchIterator i = rx.globalMatch(diff_formula);
190
191 while(i.hasNext())
192 {
193 QRegularExpressionMatch match = i.next();
194
195 qDebug() << match.captured(2) << " " << match.captured(2) << " "
196 << match.captured(3);
197
198 if(match.captured(2) == "C")
199 {
200 m_atomCount[AtomIsotopeSurvey::C] = match.captured(3).toInt();
201 }
202 else if(match.captured(2) == "H")
203 {
204 m_atomCount[AtomIsotopeSurvey::H] = match.captured(3).toInt();
205 }
206 else if(match.captured(2) == "N")
207 {
208 m_atomCount[AtomIsotopeSurvey::N] = match.captured(3).toInt();
209 }
210 else if(match.captured(2) == "O")
211 {
212 m_atomCount[AtomIsotopeSurvey::O] = match.captured(3).toInt();
213 }
214 else if(match.captured(2) == "S")
215 {
216 m_atomCount[AtomIsotopeSurvey::S] = match.captured(3).toInt();
217 }
218 }
219
220 // look for isotopes :
221 rx.setPattern("\\(([-]{0,1}\\d+)\\)([C,H,O,N,H,S])\\s([-]{0,1}\\d+)");
222
223 i = rx.globalMatch(diff_formula);
224
225 while(i.hasNext())
226 {
227 QRegularExpressionMatch match = i.next();
228
229 qDebug() << match.captured(1) << " " << match.captured(2) << " "
230 << match.captured(3);
231
232 int number_of_isotopes = match.captured(3).toInt();
233
234 if(match.captured(2) == "C")
235 {
236 if(match.captured(1) == "13")
237 {
238 m_mapIsotope.at(Isotope::C13) = number_of_isotopes;
239 }
240 m_atomCount[AtomIsotopeSurvey::C] += number_of_isotopes;
241 }
242 else if(match.captured(2) == "H")
243 {
244 if(match.captured(1) == "2")
245 {
246 m_mapIsotope.at(Isotope::H2) = number_of_isotopes;
247 }
248 m_atomCount[AtomIsotopeSurvey::H] += number_of_isotopes;
249 }
250 else if(match.captured(2) == "N")
251 {
252 if(match.captured(1) == "15")
253 {
254 m_mapIsotope.at(Isotope::N15) = number_of_isotopes;
255 }
256 m_atomCount[AtomIsotopeSurvey::N] += number_of_isotopes;
257 }
258 else if(match.captured(2) == "O")
259 {
260 if(match.captured(1) == "17")
261 {
262 m_mapIsotope.at(Isotope::O17) = number_of_isotopes;
263 }
264 else if(match.captured(1) == "18")
265 {
266 m_mapIsotope.at(Isotope::O18) = number_of_isotopes;
267 }
268 m_atomCount[AtomIsotopeSurvey::O] += number_of_isotopes;
269 }
270 else if(match.captured(2) == "S")
271 {
272 if(match.captured(1) == "33")
273 {
274 m_mapIsotope.at(Isotope::S33) = number_of_isotopes;
275 }
276 else if(match.captured(1) == "34")
277 {
278 m_mapIsotope.at(Isotope::S34) = number_of_isotopes;
279 }
280 else if(match.captured(1) == "36")
281 {
282 m_mapIsotope.at(Isotope::S36) = number_of_isotopes;
283 }
284 m_atomCount[AtomIsotopeSurvey::S] += number_of_isotopes;
285 }
286 }
287
289}
290
291
292void
294{
295 pappso_double theoreticalm_mass = 0;
296 std::map<AtomIsotopeSurvey, int>::const_iterator it_atom =
298 if(it_atom != m_atomCount.end())
299 {
300 theoreticalm_mass += MASSCARBON * (it_atom->second);
301 }
302 it_atom = m_atomCount.find(AtomIsotopeSurvey::H);
303 if(it_atom != m_atomCount.end())
304 {
305 theoreticalm_mass += MPROTIUM * (it_atom->second);
306 }
307
308 it_atom = m_atomCount.find(AtomIsotopeSurvey::O);
309 if(it_atom != m_atomCount.end())
310 {
311 theoreticalm_mass += MASSOXYGEN * (it_atom->second);
312 }
313
314 it_atom = m_atomCount.find(AtomIsotopeSurvey::N);
315 if(it_atom != m_atomCount.end())
316 {
317 theoreticalm_mass += MASSNITROGEN * (it_atom->second);
318 }
319 it_atom = m_atomCount.find(AtomIsotopeSurvey::S);
320 if(it_atom != m_atomCount.end())
321 {
322 theoreticalm_mass += MASSSULFUR * (it_atom->second);
323 }
324
325 qDebug() << theoreticalm_mass;
326
327 theoreticalm_mass += DIFFC12C13 * m_mapIsotope.at(Isotope::C13);
328 theoreticalm_mass += DIFFH1H2 * m_mapIsotope.at(Isotope::H2);
329 theoreticalm_mass += DIFFN14N15 * m_mapIsotope.at(Isotope::N15);
330 theoreticalm_mass += DIFFO16O17 * m_mapIsotope.at(Isotope::O17);
331 theoreticalm_mass += DIFFO16O18 * m_mapIsotope.at(Isotope::O18);
332 theoreticalm_mass += DIFFS32S33 * m_mapIsotope.at(Isotope::S33);
333 theoreticalm_mass += DIFFS32S34 * m_mapIsotope.at(Isotope::S34);
334 theoreticalm_mass += DIFFS32S36 * m_mapIsotope.at(Isotope::S36);
335
336
337 pappso_double diff = std::fabs((pappso_double)m_mass - theoreticalm_mass);
338 if(diff < 0.001)
339 {
340 m_mass = theoreticalm_mass;
341 qDebug() << diff;
342 }
343 else
344 {
345 qDebug()
346 << "ERROR in AaModification::calculateMassFromChemicalComponents theo="
347 << theoreticalm_mass << " m=" << m_mass << " diff=" << diff
348 << " accession=" << m_accession;
349 }
350}
351
354{
355 QString accession = QString("%1").arg(modificationMass);
356 qDebug() << accession;
357 QMutexLocker locker(&m_mutex);
358 if(m_mapAccessionModifications.find(accession) ==
360 {
361 // not found
362 m_mapAccessionModifications.insert(std::pair<QString, AaModification *>(
363 accession, new AaModification(accession, modificationMass)));
364 }
365 else
366 {
367 // found
368 }
369 return m_mapAccessionModifications.at(accession);
370}
371
373AaModification::getInstance(const QString &accession)
374{
375 try
376 {
377 QMutexLocker locker(&m_mutex);
378 MapAccessionModifications::iterator it =
379 m_mapAccessionModifications.find(accession);
380 if(it == m_mapAccessionModifications.end())
381 {
382
383 // not found
384 std::pair<MapAccessionModifications::iterator, bool> insert_res =
386 std::pair<QString, AaModificationP>(
387 accession, AaModification::createInstance(accession)));
388 it = insert_res.first;
389 }
390 else
391 {
392 // found
393 }
394 return it->second;
395 }
396 catch(ExceptionNotFound &e)
397 {
398 throw ExceptionNotFound(
399 QObject::tr("ERROR getting instance of : %1 NOT FOUND\n%2")
400 .arg(accession)
401 .arg(e.qwhat()));
402 }
403 catch(PappsoException &e)
404 {
405 throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
406 .arg(accession)
407 .arg(e.qwhat()));
408 }
409 catch(std::exception &e)
410 {
411 throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
412 .arg(accession)
413 .arg(e.what()));
414 }
415}
416
419{
420
421 QMutexLocker locker(&m_mutex);
422 MapAccessionModifications::iterator it =
424 if(it == m_mapAccessionModifications.end())
425 {
426 // not found
427 std::pair<MapAccessionModifications::iterator, bool> insert_res =
428 m_mapAccessionModifications.insert(std::pair<QString, AaModificationP>(
430 it = insert_res.first;
431 }
432 else
433 {
434 // found
435 }
436 return it->second;
437}
438
439
442 pappso_double mass,
443 const PeptideSp &peptide_sp,
444 unsigned int position)
445{
447 if(MzRange(mass, precision).contains(getInstance("MOD:00719")->getMass()))
448 {
449 if(type == "M")
450 {
451 return getInstance("MOD:00719");
452 }
453 if(type == "K")
454 {
455 return getInstance("MOD:01047");
456 }
457 }
458 // accession== "MOD:00057"
459 if(MzRange(mass, precision).contains(getInstance("MOD:00408")->getMass()))
460 {
461 // id: MOD:00394
462 // name: acetylated residue
463 // potential N-terminus modifications
464 if(position == 0)
465 {
466 return getInstance("MOD:00408");
467 }
468 }
469 if(MzRange(mass, precision).contains(getInstance("MOD:01160")->getMass()))
470 {
471 //-17.02655
472 // loss of ammonia [MOD:01160] -17.026549
473 return getInstance("MOD:01160");
474 }
475
476 if(MzRange(mass, precision).contains(getInstance("MOD:01060")->getMass()))
477 {
478 //// iodoacetamide [MOD:00397] 57.021464
479 if(type == "C")
480 {
481 return getInstance("MOD:01060");
482 }
483 else
484 {
485 return getInstance("MOD:00397");
486 }
487 }
488 if(MzRange(mass, precision).contains(getInstance("MOD:00704")->getMass()))
489 {
490 // loss of water
491 /*
492 if (position == 0) {
493 if (peptide_sp.get()->getSequence().startsWith("EG")) {
494 return getInstance("MOD:00365");
495 }
496 if (peptide_sp.get()->getSequence().startsWith("ES")) {
497 return getInstance("MOD:00953");
498 }
499 if (type == "E") {
500 return getInstance("MOD:00420");
501 }
502 }
503 */
504 // dehydrated residue [MOD:00704] -18.010565
505 return getInstance("MOD:00704");
506 }
507 if(MzRange(mass, precision).contains(getInstance("MOD:00696")->getMass()))
508 {
509 // phosphorylated residue [MOD:00696] 79.966330
510 return getInstance("MOD:00696");
511 }
512 bool isCter = false;
513 if(peptide_sp.get()->size() == (position + 1))
514 {
515 isCter = true;
516 }
517 if((position == 0) || isCter)
518 {
519 if(MzRange(mass, precision).contains(getInstance("MOD:00429")->getMass()))
520 {
521 // dimethyl
522 return getInstance("MOD:00429");
523 }
524 if(MzRange(mass, precision).contains(getInstance("MOD:00552")->getMass()))
525 {
526 // 4x(2)H labeled dimethyl residue
527 return getInstance("MOD:00552");
528 }
529 if(MzRange(mass, precision).contains(getInstance("MOD:00638")->getMass()))
530 {
531 // 2x(13)C,6x(2)H-dimethylated arginine
532 return getInstance("MOD:00638");
533 }
534 }
535 throw PappsoException(
536 QObject::tr("tandem modification not found : %1 %2 %3 %4")
537 .arg(type)
538 .arg(mass)
539 .arg(peptide_sp.get()->getSequence())
540 .arg(position));
541}
542
545{
546 return m_mass;
547}
548
549
550int
552{
553 // qDebug() << "AaModification::getNumberOfAtom(AtomIsotopeSurvey atom) NOT
554 // IMPLEMENTED";
555 return m_atomCount.at(atom);
556}
557
558
559int
561{
562 try
563 {
564 return m_mapIsotope.at(isotope);
565 }
566 catch(std::exception &e)
567 {
568 throw PappsoException(
569 QObject::tr("ERROR in AaModification::getNumberOfIsotope %2")
570 .arg(e.what()));
571 }
572}
573
574
575bool
577{
578 if(m_accession.startsWith("internal:"))
579 {
580 return true;
581 }
582 return false;
583}
584
586AaModification::createInstanceMutation(const Aa &aa_from, const Aa &aa_to)
587{
588 QString accession(
589 QString("MUTATION:%1=>%2").arg(aa_from.getLetter()).arg(aa_to.getLetter()));
590 double diffMono = aa_to.getMass() - aa_from.getMass();
591 // not found
592 AaModification *instance_mutation;
593 // qDebug() << " AaModification::createInstance begin";
594 instance_mutation = new AaModification(accession, diffMono);
595 // xref: DiffFormula: "C 0 H 0 N 0 O 1 S 0"
596
597 for(std::int8_t atomInt = (std::int8_t)AtomIsotopeSurvey::C;
598 atomInt != (std::int8_t)AtomIsotopeSurvey::last;
599 atomInt++)
600 {
601 AtomIsotopeSurvey atom = static_cast<AtomIsotopeSurvey>(atomInt);
602 instance_mutation->m_atomCount[atom] =
603 aa_to.getNumberOfAtom(atom) - aa_from.getNumberOfAtom(atom);
604 }
605 instance_mutation->m_name = QString("mutation from %1 to %2")
606 .arg(aa_from.getLetter())
607 .arg(aa_to.getLetter());
608 return instance_mutation;
609}
610
611
613AaModification::getInstanceMutation(const QChar &mut_from, const QChar &mut_to)
614{
615 QString accession(QString("MUTATION:%1=>%2").arg(mut_from).arg(mut_to));
616 try
617 {
618 QMutexLocker locker(&m_mutex);
619 MapAccessionModifications::iterator it =
620 m_mapAccessionModifications.find(accession);
621 if(it == m_mapAccessionModifications.end())
622 {
623 Aa aa_from(mut_from.toLatin1());
624 Aa aa_to(mut_to.toLatin1());
625 AaModificationP instance_mutation =
626 createInstanceMutation(aa_from, aa_to);
627
628 std::pair<MapAccessionModifications::iterator, bool> insert_res =
630 std::pair<QString, AaModificationP>(accession,
631 instance_mutation));
632 it = insert_res.first;
633 }
634 else
635 {
636 // found
637 }
638 return it->second;
639 }
640 catch(ExceptionNotFound &e)
641 {
642 throw ExceptionNotFound(
643 QObject::tr("ERROR getting instance of : %1 NOT FOUND\n%2")
644 .arg(accession)
645 .arg(e.qwhat()));
646 }
647 catch(PappsoException &e)
648 {
649 throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
650 .arg(accession)
651 .arg(e.qwhat()));
652 }
653 catch(std::exception &e)
654 {
655 throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
656 .arg(accession)
657 .arg(e.what()));
658 }
659} // namespace pappso
660
661} // namespace pappso
amino acid modification model
virtual const char & getLetter() const
Definition: aabase.cpp:434
const QString & getName() const
static AaModificationP getInstanceMutation(const QChar &mut_from, const QChar &mut_to)
get a fake modification coding a mutation from an amino acid to an other
static AaModificationP createInstance(const QString &saccession)
std::map< Isotope, int > m_mapIsotope
const QString & getAccession() const
static AaModificationP getInstanceXtandemMod(const QString &type, pappso_double mass, const PeptideSp &peptide_sp, unsigned int position)
AaModification(AaModification &&toCopy)
std::map< AtomIsotopeSurvey, int > m_atomCount
int getNumberOfAtom(AtomIsotopeSurvey atom) const override final
get the number of atom C, O, N, H in the molecule
pappso_double getMass() const
void setXrefOrigin(const QString &origin)
set list of amino acid on which this modification takes place
std::map< QString, AaModificationP > MapAccessionModifications
static AaModificationP getInstance(const QString &accession)
static AaModificationP getInstanceCustomizedMod(pappso_double modificationMass)
const QString m_accession
void setDiffFormula(const QString &diff_formula)
static AaModificationP createInstanceMutation(const Aa &aa_from, const Aa &aa_to)
void calculateMassFromChemicalComponents()
static MapAccessionModifications m_mapAccessionModifications
int getNumberOfIsotope(Isotope isotope) const override final
get the number of isotopes C13, H2, O17, O18, N15, S33, S34, S36 in the molecule
Definition: aa.h:45
int getNumberOfAtom(AtomIsotopeSurvey atom) const override final
get the number of atom C, O, N, H in the molecule
Definition: aa.cpp:166
pappso_double getMass() const override
Definition: aa.cpp:79
const OboPsiModTerm & getOne()
const char * what() const noexcept override
virtual const QString & qwhat() const
static PrecisionPtr getDaltonInstance(pappso_double value)
get a Dalton precision pointer
Definition: precision.cpp:130
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
const pappso_double DIFFS32S33(32.9714589101 - MASSSULFUR)
const pappso_double DIFFS32S34(33.9678670300 - MASSSULFUR)
const pappso_double DIFFO16O17(16.99913150 - MASSOXYGEN)
const pappso_double MASSCARBON(12)
const pappso_double MASSSULFUR(31.9720711741)
std::shared_ptr< const Peptide > PeptideSp
const pappso_double DIFFS32S36(35.9670812000 - MASSSULFUR)
const AaModification * AaModificationP
AtomIsotopeSurvey
Definition: types.h:77
double pappso_double
A type definition for doubles.
Definition: types.h:49
Isotope
Definition: types.h:92
const pappso_double MPROTIUM(1.007825032241)
const pappso_double MASSNITROGEN(14.0030740048)
const pappso_double MASSOXYGEN(15.99491461956)
const pappso_double DIFFO16O18(17.9991610 - MASSOXYGEN)
const pappso_double DIFFN14N15(15.0001088982 - MASSNITROGEN)
const pappso_double DIFFC12C13(1.0033548378)
const pappso_double DIFFH1H2(2.0141017778 - MPROTIUM)