libpappsomspp
Library for mass spectrometry
mzintegrationparams.cpp
Go to the documentation of this file.
1/* BEGIN software license
2 *
3 * msXpertSuite - mass spectrometry software suite
4 * -----------------------------------------------
5 * Copyright(C) 2009,...,2018 Filippo Rusconi
6 *
7 * http://www.msxpertsuite.org
8 *
9 * This file is part of the msXpertSuite project.
10 *
11 * The msXpertSuite project is the successor of the massXpert project. This
12 * project now includes various independent modules:
13 *
14 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
15 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
16 *
17 * This program is free software: you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation, either version 3 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program. If not, see <http://www.gnu.org/licenses/>.
29 *
30 * END software license
31 */
32
33
34/////////////////////// StdLib includes
35#include <map>
36#include <cmath>
37
38
39/////////////////////// Qt includes
40#include <QDebug>
41#include <QString>
42#include <QFile>
43#include <QDateTime>
44
45
46/////////////////////// pappsomspp includes
47#include "../../utils.h"
48#include "../../massspectrum/massspectrum.h"
49
50
51/////////////////////// Local includes
52#include "mzintegrationparams.h"
53
54
56 qRegisterMetaType<pappso::MzIntegrationParams>("pappso::MzIntegrationParams");
58 qRegisterMetaType<pappso::MzIntegrationParams *>(
59 "pappso::MzIntegrationParams *");
60
61
62namespace pappso
63{
64
65
66//! Map relating the BinningType to a textual representation
67std::map<BinningType, QString> binningTypeMap{
68 {BinningType::NONE, "NONE"},
69 {BinningType::DATA_BASED, "DATA_BASED"},
70 {BinningType::ARBITRARY, "ARBITRARY"}};
71
72
74{
77}
78
79
82 BinningType binningType,
83 int decimalPlaces,
84 pappso::PrecisionPtr precisionPtr,
85 bool applyMzShift,
87 bool removeZeroValDataPoints)
88 : m_smallestMz(minMz),
89 m_greatestMz(maxMz),
90 m_binningType(binningType),
91 m_decimalPlaces(decimalPlaces),
92 mp_precision(precisionPtr),
93 m_applyMzShift(applyMzShift),
94 m_mzShift(mzShift),
95 m_removeZeroValDataPoints(removeZeroValDataPoints)
96{
97 if(mp_precision == nullptr)
99}
100
101
103 : m_smallestMz(other.m_smallestMz),
104 m_greatestMz(other.m_greatestMz),
105 m_binningType(other.m_binningType),
106 m_decimalPlaces(other.m_decimalPlaces),
107 mp_precision(other.mp_precision),
108 m_applyMzShift(other.m_applyMzShift),
109 m_mzShift(other.m_mzShift),
110 m_removeZeroValDataPoints(other.m_removeZeroValDataPoints)
111{
112 if(mp_precision == nullptr)
114}
115
116
118{
119}
120
121
124{
125 if(this == &other)
126 return *this;
127
131
133
135 if(mp_precision == nullptr)
137
139 m_mzShift = other.m_mzShift;
141
142 return *this;
143}
144
145
146void
148{
149 m_smallestMz = value;
150}
151
152
153void
155{
156 m_smallestMz = m_smallestMz > value ? value : m_smallestMz;
157}
158
159
162{
163 return m_smallestMz;
164}
165
166
167void
169{
170 m_greatestMz = value;
171}
172
173
174void
176{
177 m_greatestMz = m_greatestMz < value ? value : m_greatestMz;
178}
179
180
183{
184 return m_greatestMz;
185}
186
187void
189{
190 m_binningType = binningType;
191}
192
195{
196 return m_binningType;
197}
198
199void
201{
202 m_decimalPlaces = decimal_places;
203}
204
205
206int
208{
209 return m_decimalPlaces;
210}
211
212void
214{
215 mp_precision = precisionPtr;
216
217 if(mp_precision == nullptr)
219}
220
223{
224 return mp_precision;
225}
226
227
228void
230{
231 m_applyMzShift = applyMzShift;
232}
233
234
235bool
237{
238 return m_applyMzShift;
239}
240
241
242void
244{
245 m_removeZeroValDataPoints = removeOrNot;
246}
247
248
249bool
251{
253}
254
255
256void
258{
259 m_mzShift = value;
260}
261
262
263double
265{
266 return m_mzShift;
267}
268
269
270//! Reset the instance to default values.
271void
273{
274 m_smallestMz = std::numeric_limits<double>::min();
275 m_greatestMz = std::numeric_limits<double>::min();
277
278 // Special case for this member datum
280
281 m_applyMzShift = false;
282 m_mzShift = 0;
284}
285
286
287bool
289{
290 int errors = 0;
291
293 {
294 // qDebug() << "m_smallestMz:" << m_smallestMz;
295 // qDebug() << "smallest is max:" << (m_smallestMz ==
296 // std::numeric_limits<double>::max());
297
298 errors += (m_smallestMz == std::numeric_limits<double>::max() ? 1 : 0);
299
300 // qDebug() << "m_greatestMz:" << m_greatestMz;
301 // qDebug() << "greatest is min:" << (m_greatestMz ==
302 // std::numeric_limits<double>::min());
303 errors += (m_greatestMz == std::numeric_limits<double>::min() ? 1 : 0);
304
305 // if(mp_precision != nullptr)
306 // qDebug() << mp_precision->toString();
307
308 errors += (mp_precision == nullptr ? 1 : 0);
309 }
310
311 if(errors)
312 {
313 qDebug()
314 << "The m/z integration parameters are not valid or do not apply...";
315 }
316
317 return !errors;
318}
319
320
321bool
323{
324 return (m_smallestMz != std::numeric_limits<double>::max()) &&
325 (m_greatestMz != std::numeric_limits<double>::min());
326}
327
328
329std::vector<double>
331{
332
333 // qDebug();
334
335 std::vector<double> bins;
336
338 {
339 // If no binning is to be performed, fine.
340 return bins;
341 }
343 {
344 // Use only data in the MzIntegrationParams member data.
345 return createArbitraryBins();
346 }
348 {
349 // qDebug();
350
351 qFatal("Programming error.");
352 }
353
354 return bins;
355}
356
357
358std::vector<double>
360{
361
362 // qDebug();
363
364 std::vector<double> bins;
365
367 {
368 // If no binning is to be performed, fine.
369 return bins;
370 }
372 {
373 // Use only data in the MzIntegrationParams member data.
374 return createArbitraryBins();
375 }
377 {
378 // qDebug();
379
380 // Use the first spectrum to perform the data-based bins
381
382 return createDataBasedBins(mass_spectrum_csp);
383 }
384
385 return bins;
386}
387
388
389std::vector<double>
391{
392
393 // qDebug();
394
395 // Now starts the tricky stuff. Depending on how the binning has been
396 // configured, we need to take diverse actions.
397
398 // qDebug() << "Bin specification:" << mp_precision->toString();
399
402
403 // qDebug() << QString::asprintf("min_mz: %.6f\n", min_mz)
404 //<< QString::asprintf("max_mz: %.6f\n", max_mz);
405
406 pappso::pappso_double binSize = mp_precision->delta(min_mz);
407
408 // qDebug() << QString::asprintf(
409 //"binSize is the precision delta for min_mz: %.6f\n", binSize);
410
411 // Only compute the decimal places if they were not configured already.
412 if(m_decimalPlaces == -1)
413 {
414 // qDebug() << "Now checking how many decimal places are needed.";
415
416 // We want as many decimal places as there are 0s between the integral
417 // part of the double and the first non-0 cipher. For example, if
418 // binSize is 0.004, zero decimals is 2 and m_decimalPlaces is set to 3,
419 // because we want decimals up to 4 included.
420
422
423 // qDebug() << "With binSize" << binSize
424 //<< " m_decimalPlaces was computed to be:" << m_decimalPlaces;
425 }
426
427 // Now that we have defined the value of m_decimalPlaces, let's use that
428 // value.
429
430 double first_mz = ceil((min_mz * std::pow(10, m_decimalPlaces)) - 0.49) /
431 pow(10, m_decimalPlaces);
432 double last_mz =
433 ceil((max_mz * pow(10, m_decimalPlaces)) - 0.49) / pow(10, m_decimalPlaces);
434
435 // qDebug() << "After having accounted for the decimals, new min/max values:"
436 //<< QString::asprintf("Very first data point: %.6f\n", first_mz)
437 //<< QString::asprintf("Very last data point to reach: %.6f\n",
438 // last_mz);
439
440 // Instanciate the vector of mz double_s that we'll feed with the bins.
441
442 std::vector<pappso::pappso_double> bins;
443
444 // Store that very first value for later use in the loop.
445 // The bins are notking more than:
446 //
447 // 1. The first mz (that is the smallest mz value found in all the spectra
448 // 2. A sequence of mz values corresponding to that first mz value
449 // incremented by the bin size.
450
451 // Seed the root of the bin vector with the first mz value rounded above as
452 // requested.
453 pappso::pappso_double previous_mz_bin = first_mz;
454
455 bins.push_back(previous_mz_bin);
456
457 // Now continue adding mz values until we have reached the end of the
458 // spectrum, that is the max_mz value, as converted using the decimals to
459 // last_mz.
460
461 // debugCount value used below for debugging purposes.
462 // int debugCount = 0;
463
464 while(previous_mz_bin <= last_mz)
465 {
466
467 // qDebug() << "Now starting the bin creation loop.";
468
469 // Calculate dynamically the precision delta according to the current mz
470 // value.
471
472 // double precision_delta = mp_precision->delta(previous_mz_bin);
473 // qDebug() << "precision_delta: " << precision_delta;
474
475 double current_mz =
476 previous_mz_bin + mp_precision->delta(previous_mz_bin);
477
478 // qDebug() << QString::asprintf(
479 //"previous_mzBin: %.6f and current_mz: %.6f\n",
480 // previous_mz_bin,
481 // current_mz);
482
483 // Now apply on the obtained mz value the decimals that were either set
484 // or computed earlier.
485
486 double current_rounded_mz =
487 ceil((current_mz * pow(10, m_decimalPlaces)) - 0.49) /
488 pow(10, m_decimalPlaces);
489
490 // qDebug() << QString::asprintf(
491 //"current_mz: %.6f and current_rounded_mz: %.6f and previous_mzBin "
492 //": % .6f\n ",
493 // current_mz,
494 // current_rounded_mz,
495 // previous_mz_bin);
496
497 // If rounding makes the new value identical to the previous one, then
498 // that means that we need to decrease roughness.
499
500 if(current_rounded_mz == previous_mz_bin)
501 {
503
504 current_rounded_mz =
505 ceil((current_mz * pow(10, m_decimalPlaces)) - 0.49) /
506 pow(10, m_decimalPlaces);
507
508 // qDebug().noquote()
509 //<< "Had to increment decimal places by one while creating the bins "
510 //"in BinningType::ARBITRARY mode..";
511 }
512
513 bins.push_back(current_rounded_mz);
514
515 // Use the local_mz value for the storage of the previous mz bin.
516 previous_mz_bin = current_rounded_mz;
517 }
518
519
520#if 0
521
522 QString fileName = "/tmp/massSpecArbitraryBins.txt-at-" +
523 QDateTime::currentDateTime().toString("yyyyMMdd-HH-mm-ss");
524
525 qDebug() << "Writing the list of bins setup in the "
526 "mass spectrum in file "
527 << fileName;
528
529 QFile file(fileName);
530 file.open(QIODevice::WriteOnly);
531
532 QTextStream fileStream(&file);
533
534 for(auto &&bin : bins)
535 fileStream << QString("%1\n").arg(bin, 0, 'f', 10);
536
537 fileStream.flush();
538 file.close();
539
540#endif
541
542 // qDebug() << "Prepared bins with " << bins.size() << "elements."
543 //<< "starting with mz" << bins.front() << "ending with mz"
544 //<< bins.back();
545
546 return bins;
547}
548
549
550std::vector<double>
552 pappso::MassSpectrumCstSPtr mass_spectrum_csp)
553{
554 // qDebug();
555
556 // The bins in *this mass spectrum must be calculated starting from the
557 // data in the mass_spectrum_csp parameter.
558
559 // Instanciate the vector of mz double_s that we'll feed with the bins.
560
561 std::vector<pappso::pappso_double> bins;
562
563 if(mass_spectrum_csp->size() < 2)
564 return bins;
565
566 // Make sure the spectrum is sorted, as this functions takes for granted
567 // that the DataPoint instances are sorted in ascending x (== mz) value
568 // order.
569 pappso::MassSpectrum local_mass_spectrum = *mass_spectrum_csp;
570 local_mass_spectrum.sortMz();
571
573
574 // qDebug() << "The min_mz:" << min_mz;
575
576 if(m_decimalPlaces != -1)
577 min_mz = ceil((min_mz * pow(10, m_decimalPlaces)) - 0.49) /
578 pow(10, m_decimalPlaces);
579
580
581 // Two values for the definition of a MassSpectrumBin.
582
583 // The first value of the mz range that defines the bin. This value is part
584 // of the bin.
585 pappso::pappso_double start_mz_in = min_mz;
586
587 // The second value of the mz range that defines the bin. This value is
588 // *not* part of the bin.
589 pappso::pappso_double end_mz_out;
590
591 std::vector<pappso::DataPoint>::const_iterator it =
592 local_mass_spectrum.begin();
593
594 pappso::pappso_double prev_mz = it->x;
595
596 if(m_decimalPlaces != -1)
597 prev_mz = ceil((prev_mz * pow(10, m_decimalPlaces)) - 0.49) /
598 pow(10, m_decimalPlaces);
599
600 ++it;
601
602 while(it != local_mass_spectrum.end())
603 {
604 pappso::pappso_double next_mz = it->x;
605
606 if(m_decimalPlaces != -1)
607 next_mz = ceil((next_mz * pow(10, m_decimalPlaces)) - 0.49) /
608 pow(10, m_decimalPlaces);
609
610 pappso::pappso_double step = next_mz - prev_mz;
611 end_mz_out = start_mz_in + step;
612
613 if(m_decimalPlaces != -1)
614 end_mz_out = ceil((end_mz_out * pow(10, m_decimalPlaces)) - 0.49) /
615 pow(10, m_decimalPlaces);
616
617 // The data point that is crafted has a 0 y-value. The binning must
618 // indeed not create artificial intensity data.
619
620 // qDebug() << "Pushing back bin:" << start_mz_in << end_mz_out;
621
622 bins.push_back(start_mz_in);
623
624 // Prepare next bin
625 start_mz_in = end_mz_out;
626
627 // Update prev_mz to be the current one for next iteration.
628 prev_mz = next_mz;
629
630 // Now got the next DataPoint instance.
631 ++it;
632 }
633
634#if 0
635
636 QString fileName = "/tmp/massSpecDataBasedBins.txt";
637
638 qDebug() << "Writing the list of bins setup in the "
639 "mass spectrum in file "
640 << fileName;
641
642 QFile file(fileName);
643 file.open(QIODevice::WriteOnly);
644
645 QTextStream fileStream(&file);
646
647 for(auto &&bin : m_bins)
648 fileStream << QString("[%1-%2]\n")
649 .arg(bin.startMzIn, 0, 'f', 10)
650 .arg(bin.endMzOut, 0, 'f', 10);
651
652 fileStream.flush();
653 file.close();
654
655 qDebug() << "elements."
656 << "starting with mz" << m_bins.front().startMzIn << "ending with mz"
657 << m_bins.back().endMzOut;
658
659#endif
660
661 return bins;
662}
663
664
665QString
666MzIntegrationParams::toString(int offset, const QString &spacer) const
667{
668 QString lead;
669
670 for(int iter = 0; iter < offset; ++iter)
671 lead += spacer;
672
673 QString text = lead;
674 text += "m/z integration parameters:\n";
675
676 text += lead;
677 text += spacer;
678 if(m_smallestMz != std::numeric_limits<double>::max())
679 text.append(
680 QString::asprintf("Smallest (first) m/z: %.6f\n", m_smallestMz));
681
682 text += lead;
683 text += spacer;
684 if(m_greatestMz != std::numeric_limits<double>::min())
685 text.append(QString::asprintf("Greatest (last) m/z: %.6f\n", m_greatestMz));
686
687 text += lead;
688 text += spacer;
689 text.append(QString("Decimal places: %1\n").arg(m_decimalPlaces));
690
691 std::map<BinningType, QString>::iterator it;
692 it = binningTypeMap.find(m_binningType);
693
694 if(it == binningTypeMap.end())
695 qFatal("Programming error.");
696
697 text += lead;
698 text += spacer;
699 text.append(QString("Binning type: %1\n").arg(it->second.toLatin1().data()));
700
701 // Only provide the details relative to the ARBITRARY binning type.
702
704 {
705 text += lead;
706 text += spacer;
707 text += spacer;
708 text.append(QString("Bin nominal size: %1\n")
709 .arg(mp_precision->getNominal(), 0, 'f', 6));
710
711 text += lead;
712 text += spacer;
713 text += spacer;
714 text.append(QString("Bin size: %2\n")
715 .arg(mp_precision->toString().toLatin1().data()));
716 }
717
718 // Now other data that are independent of the bin settings.
719
720 text += lead;
721 text += spacer;
722 text +=
723 QString("Apply m/z shift: %1\n").arg(m_applyMzShift ? "true" : "false");
724
726 {
727 text += lead;
728 text += spacer;
729 text += spacer;
730 text += QString("m/z shift: %1").arg(m_mzShift, 0, 'f', 6);
731 }
732
733 text += lead;
734 text += spacer;
735 text += QString("Remove 0-val data points: %1\n")
736 .arg(m_removeZeroValDataPoints ? "true" : "false");
737
738 return text;
739}
740
741} // namespace pappso
742
Class to represent a mass spectrum.
Definition: massspectrum.h:71
void sortMz()
Sort the DataPoint instances of this spectrum.
The MzIntegrationParams class provides the parameters definining how m/z !
Q_INVOKABLE BinningType getBinningType() const
Q_INVOKABLE int getDecimalPlaces() const
pappso::pappso_double getSmallestMz() const
pappso::pappso_double m_smallestMz
MzIntegrationParams & operator=(const MzIntegrationParams &other)
Q_INVOKABLE pappso::pappso_double getGreatestMz() const
pappso::pappso_double m_greatestMz
Q_INVOKABLE pappso::PrecisionPtr getPrecision() const
Q_INVOKABLE bool isApplyMzShift() const
std::vector< double > createArbitraryBins()
Q_INVOKABLE void setPrecision(pappso::PrecisionPtr precisionPtr)
Q_INVOKABLE double getMzShift() const
Q_INVOKABLE void updateSmallestMz(pappso::pappso_double value)
Q_INVOKABLE void updateGreatestMz(pappso::pappso_double value)
Q_INVOKABLE bool isRemoveZeroValDataPoints() const
Q_INVOKABLE bool isValid() const
Q_INVOKABLE bool hasValidMzRange() const
Q_INVOKABLE QString toString(int offset=0, const QString &spacer=QString()) const
pappso::PrecisionPtr mp_precision
Q_INVOKABLE void setSmallestMz(pappso::pappso_double value)
Q_INVOKABLE void setBinningType(BinningType binningType)
Q_INVOKABLE void reset()
Reset the instance to default values.
std::vector< double > createDataBasedBins(pappso::MassSpectrumCstSPtr massSpectrum)
Q_INVOKABLE void setApplyMzShift(bool applyMzShift)
Q_INVOKABLE void setDecimalPlaces(int decimal_places)
Q_INVOKABLE std::vector< pappso::pappso_double > createBins()
Q_INVOKABLE void setMzShift(double value)
Q_INVOKABLE void setRemoveZeroValDataPoints(bool removeOrNot=true)
Q_INVOKABLE void setGreatestMz(pappso::pappso_double value)
pappso::pappso_double m_mzShift
virtual QString toString() const =0
virtual pappso_double getNominal() const final
Definition: precision.cpp:65
virtual pappso_double delta(pappso_double value) const =0
static PrecisionPtr getPpmInstance(pappso_double value)
get a ppm precision pointer
Definition: precision.cpp:150
static PrecisionPtr getDaltonInstance(pappso_double value)
get a Dalton precision pointer
Definition: precision.cpp:130
static int zeroDecimalsInValue(pappso_double value)
0.11 would return 0 (no empty decimal) 2.001 would return 2 1000.0001254 would return 3
Definition: utils.cpp:82
int mzIntegrationParamsMetaTypeId
int mzIntegrationParamsPtrMetaTypeId
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
BinningType
Type of binning when performing integrations to a mass spectrum.
@ DATA_BASED
binning based on mass spectral data
@ ARBITRARY
binning based on arbitrary bin size value
@ NONE
< no binning
double pappso_double
A type definition for doubles.
Definition: types.h:49
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
Definition: massspectrum.h:55
std::map< BinningType, QString > binningTypeMap
Map relating the BinningType to a textual representation.