libpappsomspp
Library for mass spectrometry
protein.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/protein/protein.cpp
3 * \date 2/7/2015
4 * \author Olivier Langella
5 * \brief object to handle a protein
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2015 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 * Contributors:
27 * Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and
28 *implementation
29 ******************************************************************************/
30
31#include "protein.h"
32#include "../peptide/peptide.h"
33#include <QStringList>
34#include <algorithm>
35#include "../pappsoexception.h"
36
37namespace pappso
38{
39
40QRegularExpression Protein::m_removeTranslationStopRegExp("\\*$");
41
42/*
43 * http://www.ncbi.nlm.nih.gov/BLAST/blastcgihelp.shtml
44 */
45// For those programs that use amino acid query sequences (BLASTP and
46// TBLASTN), the accepted amino acid codes are:
47//
48// A alanine P proline
49// B aspartate/asparagine Q glutamine
50// C cystine R arginine
51// D aspartate S serine
52// E glutamate T threonine
53// F phenylalanine U selenocysteine
54// G glycine V valine
55// H histidine W tryptophan
56// I isoleucine Y tyrosine
57// K lysine Z glutamate/glutamine
58// L leucine X any
59// M methionine * translation stop
60// N asparagine - gap of indeterminate length
61
63{
64}
65Protein::Protein(const QString &description, const QString &sequence)
66 : m_description(description.simplified()),
67 m_accession(m_description.split(" ").at(0)),
68 m_sequence(sequence)
69{
70 m_description = m_description.remove(0, m_accession.size()).simplified();
71 // m_sequence.replace(m_removeTranslationStopRegExp, "");
72 m_length = m_sequence.size();
73}
75 : m_description(protein.m_description),
76 m_accession(protein.m_accession),
77 m_sequence(protein.m_sequence),
78 m_length(protein.m_length)
79{
80}
81
82Protein &
84{
86 return (*this);
87}
88
89Protein &
91{
92 std::reverse(m_sequence.begin(), m_sequence.end());
93 return (*this);
94}
95
98{
99 return std::make_shared<Protein>(*this);
100}
101
102
103bool
104Protein::operator==(const Protein &other) const
105{
106 return (m_accession == other.m_accession);
107}
108
109void
110Protein::setSequence(const QString &sequence)
111{
112 m_sequence = sequence.simplified();
113 m_length = m_sequence.size();
114}
115unsigned int
117{
118 return m_length;
119}
120
121const QString &
123{
124 return m_sequence;
125}
126const QString &
128{
129 return m_accession;
130}
131void
132Protein::setAccession(const QString &accession)
133{
134 m_accession = accession.simplified();
135}
136const QString &
138{
139 return m_description;
140}
141void
142Protein::setDescription(const QString &description)
143{
144 m_description = description.simplified();
145}
147{
148}
151{
152 try
153 {
154 // qDebug() << "ProteinXtp::getMass() begin " <<
155 // getOnlyAminoAcidSequence().replace("[BZX]","E");
156 // replace amino acid wildcard by E, just to give an random mass (assumed
157 // it is not perfect)
158 QString sequence(m_sequence);
159 sequence.replace(QRegularExpression("[^WGASPVTLINDKQEMHFRCYUBZX]"), "");
160 pappso::Peptide peptide(
161 sequence.replace(QRegularExpression("[BZX]"), "E"));
162 return peptide.getMass();
163 }
164 catch(pappso::PappsoException &error)
165 {
167 QObject::tr("Error computing mass for protein %1 :\n%2")
168 .arg(getAccession())
169 .arg(error.qwhat()));
170 }
171}
172} // namespace pappso
virtual const QString & qwhat() const
pappso_double getMass()
Definition: peptide.cpp:207
const QString & getAccession() const
Definition: protein.cpp:127
const QString & getDescription() const
Definition: protein.cpp:137
Protein & removeTranslationStop()
remove * characters at the end of the sequence
Definition: protein.cpp:83
unsigned int m_length
number of amino acid
Definition: protein.h:60
pappso_double getMass() const
get monoisotopic mass of ProteinSp Protein::makeProteinSp() const
Definition: protein.cpp:150
QString m_sequence
the amino acid sequence
Definition: protein.h:58
static QRegularExpression m_removeTranslationStopRegExp
Definition: protein.h:62
unsigned int size() const
protein amino acid sequence size
Definition: protein.cpp:116
Protein & reverse()
reverse characters in the sequence
Definition: protein.cpp:90
QString m_accession
a single unique identifier of the protein (usually the first word of description)
Definition: protein.h:56
bool operator==(const Protein &other) const
Definition: protein.cpp:104
QString m_description
free text to describe the protein
Definition: protein.h:53
ProteinSp makeProteinSp() const
Definition: protein.cpp:97
void setSequence(const QString &sequence)
Definition: protein.cpp:110
const QString & getSequence() const
Definition: protein.cpp:122
void setDescription(const QString &description)
Definition: protein.cpp:142
virtual void setAccession(const QString &accession)
Definition: protein.cpp:132
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
double pappso_double
A type definition for doubles.
Definition: types.h:49
std::shared_ptr< const Protein > ProteinSp
shared pointer on a Protein object
Definition: protein.h:47
object to handle a protein