libpappsomspp
Library for mass spectrometry
timsdata.h
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/vendors/tims/timsdata.h
3  * \date 27/08/2019
4  * \author Olivier Langella
5  * \brief main Tims data handler
6  */
7 
8 /*******************************************************************************
9 œ* Copyright (c) 2019 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  ******************************************************************************/
27 
28 #pragma once
29 
30 #include <QDir>
31 #include <QSqlDatabase>
32 #include "timsbindec.h"
33 #include "timsframe.h"
34 #include "../../massspectrum/qualifiedmassspectrum.h"
35 #include "../../processing/filters/filterinterface.h"
36 #include "../../msrun/xiccoord/xiccoordtims.h"
37 #include "../../msrun/msrunreader.h"
38 #include <deque>
39 #include <QMutex>
40 #include <QSqlQuery>
42 
43 namespace pappso
44 {
45 
46 class TimsData;
47 
48 /** \brief shared pointer on a TimsData object
49  */
50 typedef std::shared_ptr<TimsData> TimsDataSp;
51 
52 /**
53  * @todo write docs
54  */
56 {
57  public:
58  /** @brief build using the tims data directory
59  */
60  TimsData(QDir timsDataDirectory);
61 
62  /**
63  * Copy constructor
64  *
65  * @param other TODO
66  */
67  TimsData(const TimsData &other);
68 
69  /**
70  * Destructor
71  */
72  virtual ~TimsData();
73 
74 
75  /** @brief get a mass spectrum given its spectrum index
76  * @param raw_index a number begining at 0, corresponding to a Tims Scan in
77  * the order they lies in the binary data file
78  */
80  getMassSpectrumCstSPtrByRawIndex(std::size_t raw_index);
81 
82  /** @brief get a mass spectrum given the tims frame database id and scan
83  * number within tims frame
84  */
85  pappso::MassSpectrumCstSPtr getMassSpectrumCstSPtr(std::size_t timsId,
86  std::size_t scanNum);
87 
88  /** @brief get the total number of scans
89  */
90  std::size_t getTotalNumberOfScans() const;
91 
92  /** @brief get the number of precursors analyzes by PASEF
93  */
94  std::size_t getTotalNumberOfPrecursors() const;
95 
96  /** @brief guess possible precursor ids given a charge, m/z, retention time
97  * and k0
98  * @return a list of possible precursor ids
99  */
100  std::vector<std::size_t> getPrecursorsFromMzRtCharge(int charge,
101  double mz_val,
102  double rt_sec,
103  double k0);
104 
105  unsigned int getMsLevelBySpectrumIndex(std::size_t spectrum_index);
106 
107  void getQualifiedMassSpectrumByRawIndex(const MsRunIdCstSPtr &msrun_id,
108  QualifiedMassSpectrum &mass_spectrum,
109  std::size_t spectrum_index,
110  bool want_binary_data);
111 
112  Trace getTicChromatogram();
113 
115  {
116  std::size_t parent_frame = 0;
117  std::size_t precursor_id = 0;
118  std::size_t scan_mobility_start = 0;
119  std::size_t scan_mobility_end = 0;
120  std::size_t ms1_index = 0;
121  std::size_t ms2_index = 0;
122  double isolationMz = 0;
123  double isolationWidth = 0;
124  float collisionEnergy = 0;
125  std::vector<std::size_t> tims_frame_list;
127  };
128 
129  void
130  getQualifiedMs2MassSpectrumByPrecursorId(const MsRunIdCstSPtr &msrun_id,
131  QualifiedMassSpectrum &mass_spectrum,
132  SpectrumDescr &spectrum_descr,
133  bool want_binary_data);
134 
135  void
136  getQualifiedMs1MassSpectrumByPrecursorId(const MsRunIdCstSPtr &msrun_id,
137  QualifiedMassSpectrum &mass_spectrum,
138  SpectrumDescr &spectrum_descr,
139  bool want_binary_data);
140 
141  /** @brief filter interface to apply just after raw MS2 specturm extraction
142  * the filter can be a list of filters inside a FilterSuite object
143  */
144  void setMs2FilterCstSPtr(pappso::FilterInterfaceCstSPtr &filter);
145 
146  /** @brief filter interface to apply just after raw MS1 specturm extraction
147  * the filter can be a list of filters inside a FilterSuite object
148  */
149  void setMs1FilterCstSPtr(pappso::FilterInterfaceCstSPtr &filter);
150 
151  /** @brief enable or disable simple centroid filter on raw tims data for MS2
152  */
153  void setMs2BuiltinCentroid(bool centroid);
154 
155 
156  /** @brief tells if simple centroid filter on raw tims data for MS2 is enabled
157  * or not
158  */
159  bool getMs2BuiltinCentroid() const;
160 
161 
162  std::vector<std::size_t> getTimsMS1FrameIdRange(double rt_begin,
163  double rt_end) const;
164 
165 
166  /** @brief get a Tims frame with his database ID
167  */
168  TimsFrameCstSPtr getTimsFrameCstSPtr(std::size_t timsId);
169 
170  XicCoordTims getXicCoordTimsFromPrecursorId(std::size_t precursor_id,
171  PrecisionPtr precision_ptr);
172 
173 
174  /** @brief function to visit an MsRunReader and get each Spectrum in a
175  * spectrum collection handler by Ms Levels
176  *
177  * this function will retrieve processed qualified spectrum depending on each
178  * Bruker precursors
179  */
180  void ms2ReaderSpectrumCollectionByMsLevel(
181  const MsRunIdCstSPtr &msrun_id,
183  unsigned int ms_level);
184 
185 
186  /** @brief function to visit an MsRunReader and get each raw Spectrum in a
187  * spectrum collection handler by Ms Levels
188  *
189  * this function will retrieve every scans as a qualified mass spectrum
190  */
191  void rawReaderSpectrumCollectionByMsLevel(
192  const MsRunIdCstSPtr &msrun_id,
194  unsigned int ms_level);
195 
196  /** @brief get cumulated raw signal for a given precursor
197  * only to use to see the raw signal
198  *
199  * @param precursor_index precursor index to extract signal from
200  * @result a map of integers, x=time of flights, y= intensities
201  */
202  std::map<quint32, quint32>
203  getRawMs2ByPrecursorId(std::size_t precursor_index);
204 
205  /** @brief get raw signal for a spectrum index
206  * only to use to see the raw signal
207  *
208  * @param spectrum_index spcetrum index
209  * @result a map of integers, x=time of flights, y= intensities
210  */
211  std::map<quint32, quint32>
212  getRawMsBySpectrumIndex(std::size_t spectrum_index);
213 
214 
215  /** @brief retention timeline
216  * get retention times along the MSrun in seconds
217  * @return vector of retention times (seconds)
218  */
219  virtual std::vector<double> getRetentionTimeLine() const;
220 
221  /** @brief get an intermediate structure describing a spectrum
222  */
223  SpectrumDescr getSpectrumDescrWithPrecursorId(std::size_t precursor_id);
224 
225 
226  private:
227  SpectrumDescr getSpectrumDescrWithScanCoordinate(
228  const std::pair<std::size_t, std::size_t> &scan_coordinate);
229 
230 
231  std::pair<std::size_t, std::size_t>
232  getScanCoordinateFromRawIndex(std::size_t spectrum_index) const;
233 
234  std::size_t getRawIndexFromCoordinate(std::size_t frame_id,
235  std::size_t scan_num) const;
236 
237  QSqlDatabase openDatabaseConnection() const;
238 
239 
240  /** @brief get a Tims frame base (no binary data file access) with his
241  * database ID
242  */
243  TimsFrameBaseCstSPtr getTimsFrameBaseCstSPtr(std::size_t timsId);
244 
245 
246  /** @brief get a Tims frame with his database ID
247  * but look in the cache first
248  */
249  TimsFrameCstSPtr getTimsFrameCstSPtrCached(std::size_t timsId);
250 
251  TimsFrameBaseCstSPtr getTimsFrameBaseCstSPtrCached(std::size_t timsId);
252 
253 
254  std::vector<std::size_t>
255  getMatchPrecursorIdByKo(std::vector<std::vector<double>> ids,
256  double ko_value);
257 
258  /** @todo documentation
259  */
260  std::vector<std::size_t>
261  getClosestPrecursorIdByMz(std::vector<std::vector<double>> ids,
262  double mz_value);
263 
264 
265  /** @brief private function to fill m_frameIdDescrList
266  */
267  void fillFrameIdDescrList();
268 
269 
270  private:
271  void
272  ms2ReaderGenerateMS1MS2Spectrum(const MsRunIdCstSPtr &msrun_id,
274  SpectrumDescr &spectrum_descr,
275  unsigned int ms_level);
276 
277  void fillSpectrumDescriptionWithSqlRecord(SpectrumDescr &spectrum_descr,
278  QSqlQuery &qprecursor_list);
279 
280  private:
282  TimsBinDec *mpa_timsBinDec = nullptr;
283  // QSqlDatabase *mpa_qdb = nullptr;
284  std::size_t m_totalNumberOfScans;
286  std::size_t m_cacheSize = 60;
287  std::deque<TimsFrameCstSPtr> m_timsFrameCache;
288  std::deque<TimsFrameBaseCstSPtr> m_timsFrameBaseCache;
289 
290  pappso::FilterInterfaceCstSPtr mcsp_ms2Filter = nullptr;
291  pappso::FilterInterfaceCstSPtr mcsp_ms1Filter = nullptr;
292 
293  /** @brief enable builtin centroid on raw tims integers by default
294  */
295  bool m_builtinMs2Centroid = true;
296 
297 
298  std::map<int, QSqlRecord> m_mapMzCalibrationRecord;
299  std::map<int, QSqlRecord> m_mapTimsCalibrationRecord;
300  std::map<std::size_t, QSqlRecord> m_mapFramesRecord;
301  std::map<std::size_t, QSqlRecord> m_mapXicCoordRecord;
302 
304 
305 
307  {
308  std::size_t m_frameId; // frame id
309  std::size_t m_size; // frame size (number of TOF scans in frame)
310  std::size_t m_cumulSize; // cumulative size
311  };
312 
313 
314  /** @brief store every frame id and corresponding sizes
315  */
316  std::vector<FrameIdDescr> m_frameIdDescrList;
317 
318  /** @brief index to find quickly a frameId in the description list with the
319  * raw index of spectrum modulo 1000
320  * @key thousands of TOF scans
321  * @value corresponding m_frameIdDescrList index
322  */
323  std::map<std::size_t, std::size_t> m_thousandIndexToFrameIdDescrListIndex;
324 
325  QMutex m_mutex;
326 };
327 } // namespace pappso
Class representing a fully specified mass spectrum.
interface to collect spectrums from the MsRunReader class
Definition: msrunreader.h:56
std::vector< FrameIdDescr > m_frameIdDescrList
store every frame id and corresponding sizes
Definition: timsdata.h:316
std::map< std::size_t, QSqlRecord > m_mapFramesRecord
Definition: timsdata.h:300
std::map< std::size_t, QSqlRecord > m_mapXicCoordRecord
Definition: timsdata.h:301
std::size_t m_totalNumberOfScans
Definition: timsdata.h:284
std::deque< TimsFrameCstSPtr > m_timsFrameCache
Definition: timsdata.h:287
std::map< int, QSqlRecord > m_mapMzCalibrationRecord
Definition: timsdata.h:298
std::map< int, QSqlRecord > m_mapTimsCalibrationRecord
Definition: timsdata.h:299
QMutex m_mutex
Definition: timsdata.h:325
TimsData(const TimsData &other)
QDir m_timsDataDirectory
Definition: timsdata.h:281
MzCalibrationStore * mpa_mzCalibrationStore
Definition: timsdata.h:303
std::deque< TimsFrameBaseCstSPtr > m_timsFrameBaseCache
Definition: timsdata.h:288
std::map< std::size_t, std::size_t > m_thousandIndexToFrameIdDescrListIndex
index to find quickly a frameId in the description list with the raw index of spectrum modulo 1000 @k...
Definition: timsdata.h:323
std::size_t m_totalNumberOfPrecursors
Definition: timsdata.h:285
A simple container of DataPoint instances.
Definition: trace.h:148
#define PMSPP_LIB_DECL
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< const TimsFrameBase > TimsFrameBaseCstSPtr
Definition: timsframebase.h:41
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition: msrunid.h:44
std::shared_ptr< TimsData > TimsDataSp
shared pointer on a TimsData object
Definition: timsdata.h:46
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
Definition: massspectrum.h:55
std::shared_ptr< const FilterInterface > FilterInterfaceCstSPtr
std::shared_ptr< const TimsFrame > TimsFrameCstSPtr
Definition: timsframe.h:43
std::vector< std::size_t > tims_frame_list
Definition: timsdata.h:125
PrecursorIonData precursor_ion_data
Definition: timsdata.h:126
coordinates of the XIC to extract and the resulting XIC after extraction
Definition: xiccoordtims.h:51
binary file handler of Bruker's TimsTof raw data
handle a single Bruker's TimsTof frame