ProteoWizard
PrecursorRecalculatorDefaultTest.cpp
Go to the documentation of this file.
1 //
2 // $Id: PrecursorRecalculatorDefaultTest.cpp 4129 2012-11-20 00:05:37Z chambm $
3 //
4 //
5 // Original author: Darren Kessner <darren@proteowizard.org>
6 //
7 // Copyright 2008 Spielberg Family Center for Applied Proteomics
8 // Cedars Sinai Medical Center, Los Angeles, California 90048
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 // http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // Unless required by applicable law or agreed to in writing, software
17 // distributed under the License is distributed on an "AS IS" BASIS,
18 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 // See the License for the specific language governing permissions and
20 // limitations under the License.
21 //
22 
23 
28 #include "boost/filesystem/path.hpp"
29 #include "boost/filesystem/fstream.hpp"
31 
32 using namespace pwiz::analysis;
33 using namespace pwiz::msdata;
34 using namespace pwiz::util;
35 using namespace pwiz::data;
36 namespace bfs = boost::filesystem;
37 
38 
39 ostream* os_ = 0;
40 
41 
42 double testData_[] =
43 {
44  818.0578, 0.0000,
45  818.0618, 0.0000,
46  818.0659, 0.0000,
47  818.0699, 0.0000,
48  818.0740, 554.0963,
49  818.0781, 676.5923,
50  818.0821, 560.7537,
51  818.0862, 0.0000,
52  818.0902, 0.0000,
53  818.0942, 0.0000,
54  818.0983, 0.0000,
55  818.1105, 0.0000,
56  818.1145, 0.0000,
57  818.1186, 0.0000,
58  818.1226, 0.0000,
59  818.1267, 391.2166,
60  818.1307, 697.9452,
61  818.1348, 593.9573,
62  818.1389, 0.0000,
63  818.1429, 0.0000,
64  818.1470, 272.1141,
65  818.1510, 693.6737,
66  818.1550, 727.4882,
67  818.1591, 411.9992,
68  818.1631, 0.0000,
69  818.1672, 0.0000,
70  818.1713, 0.0000,
71  818.1753, 0.0000,
72  818.3740, 0.0000,
73  818.3780, 0.0000,
74  818.3821, 0.0000,
75  818.3861, 0.0000,
76  818.3902, 220.8158,
77  818.3942, 649.2535,
78  818.3983, 1322.3580,
79  818.4023, 2346.6816,
80  818.4064, 5577.4443,
81  818.4105, 15628.4590,
82  818.4145, 28139.2852,
83  818.4186, 34538.0156,
84  818.4226, 29967.1211,
85  818.4267, 17854.7773,
86  818.4307, 6258.7852,
87  818.4348, 336.7964,
88  818.4388, 0.0000,
89  818.4429, 0.0000,
90  818.4470, 0.0000,
91  818.4510, 0.0000,
92  818.8811, 0.0000,
93  818.8852, 0.0000,
94  818.8892, 0.0000,
95  818.8933, 0.0000,
96  818.8973, 493.9565,
97  818.9014, 1365.4312,
98  818.9055, 2507.0815,
99  818.9095, 6813.2627,
100  818.9136, 13756.5684,
101  818.9177, 18748.5176,
102  818.9217, 18208.9883,
103  818.9258, 12877.9766,
104  818.9298, 6632.2642,
105  818.9339, 2455.7969,
106  818.9380, 518.3702,
107  818.9420, 0.0000,
108  818.9461, 0.0000,
109  818.9501, 0.0000,
110  818.9542, 0.0000,
111  818.9583, 0.0000,
112  818.9623, 416.6718,
113  818.9664, 835.6812,
114  818.9705, 899.3900,
115  818.9745, 565.6674,
116  818.9786, 0.0000,
117  818.9826, 0.0000,
118  818.9867, 0.0000,
119  818.9907, 0.0000,
120  819.3401, 0.0000,
121  819.3442, 0.0000,
122  819.3483, 0.0000,
123  819.3524, 0.0000,
124  819.3564, 537.5367,
125  819.3605, 666.3043,
126  819.3645, 707.9114,
127  819.3686, 560.4056,
128  819.3727, 0.0000,
129  819.3767, 0.0000,
130  819.3808, 0.0000,
131  819.3848, 0.0000,
132  819.3889, 0.0000,
133  819.3930, 0.0000,
134  819.3970, 0.0000,
135  819.4011, 248.0490,
136  819.4052, 983.9253,
137  819.4092, 2492.4019,
138  819.4133, 4937.9619,
139  819.4174, 7837.6245,
140  819.4214, 9702.0254,
141  819.4255, 9001.9619,
142  819.4296, 6028.9702,
143  819.4337, 2715.7598,
144  819.4377, 881.8906,
145  819.4418, 979.8033,
146  819.4458, 1142.8175,
147  819.4499, 901.4358,
148  819.4540, 509.0410,
149  819.4580, 0.0000,
150  819.4621, 0.0000,
151  819.4661, 0.0000,
152  819.4702, 0.0000,
153  819.8810, 0.0000,
154  819.8851, 0.0000,
155  819.8892, 0.0000,
156  819.8932, 0.0000,
157  819.8973, 38.7442,
158  819.9014, 719.8252,
159  819.9055, 1409.7166,
160  819.9095, 1759.1530,
161  819.9136, 1186.1797,
162  819.9177, 834.6477,
163  819.9218, 2120.9097,
164  819.9258, 2723.4282,
165  819.9299, 2148.7354,
166  819.9340, 951.6946,
167  819.9380, 0.0000,
168  819.9421, 0.0000,
169  819.9462, 0.0000,
170  819.9503, 0.0000,
171  819.9543, 0.0000,
172  820.1131, 0.0000,
173  820.1172, 0.0000,
174  820.1212, 0.0000,
175  820.1253, 0.0000,
176  820.1294, 283.9149,
177  820.1335, 685.0024,
178  820.1375, 841.5573,
179  820.1416, 831.9690,
180  820.1457, 724.9828,
181  820.1498, 478.1599,
182  820.1538, 0.0000,
183  820.1579, 0.0000,
184  820.1620, 0.0000,
185  820.1660, 0.0000,
186  820.3942, 0.0000,
187  820.3983, 0.0000,
188  820.4023, 0.0000,
189  820.4064, 0.0000,
190  820.4105, 0.0000,
191  820.4146, 733.8157,
192  820.4186, 1578.8794,
193  820.4227, 1832.4481,
194  820.4268, 1322.1443,
195  820.4308, 489.9802,
196  820.4349, 0.0000,
197  820.4390, 0.0000,
198  820.4431, 0.0000,
199  820.4471, 259.0050,
200  820.4512, 654.6262,
201  820.4553, 731.2765,
202  820.4594, 517.5179,
203  820.4634, 0.0000,
204  820.4675, 0.0000,
205  820.4716, 0.0000,
206  820.4756, 0.0000,
207  820.5205, 0.0000,
208  820.5246, 0.0000,
209  820.5287, 0.0000,
210  820.5327, 0.0000,
211  820.5368, 618.2869,
212  820.5409, 684.1355,
213  820.5450, 464.5241,
214  820.5491, 0.0000,
215  820.5531, 0.0000,
216  820.5572, 0.0000,
217  820.5613, 0.0000,
218  820.5654, 0.0000,
219  820.5694, 0.0000,
220  820.5735, 302.8770,
221  820.5776, 748.6038,
222  820.5817, 961.3633,
223  820.5858, 820.3262,
224  820.5898, 413.4973,
225  820.5939, 0.0000,
226  820.5980, 0.0000,
227  820.6021, 0.0000,
228  820.6061, 0.0000,
229  820.6102, 0.0000,
230  820.6143, 354.7731,
231  820.6183, 890.8882,
232  820.6224, 1160.5521,
233  820.6265, 1128.5698,
234  820.6306, 893.9106,
235  820.6346, 579.9231,
236  820.6387, 0.0000,
237  820.6428, 0.0000,
238  820.6469, 0.0000,
239  820.6509, 0.0000,
240  820.8589, 0.0000,
241  820.8630, 0.0000,
242  820.8671, 0.0000,
243  820.8712, 0.0000,
244  820.8753, 567.8625,
245  820.8793, 953.4827,
246  820.8834, 1072.7717,
247  820.8875, 1019.1711,
248  820.8916, 946.2395,
249  820.8957, 748.0505,
250  820.8998, 448.6019,
251  820.9039, 0.0000,
252  820.9079, 0.0000,
253  820.9120, 0.0000,
254  820.9161, 0.0000,
255  821.3365, 0.0000,
256  821.3406, 0.0000,
257  821.3447, 0.0000,
258  821.3488, 0.0000,
259  821.3529, 551.2963,
260  821.3569, 717.1707,
261  821.3610, 837.5309,
262  821.3651, 841.7739,
263  821.3692, 261.5813,
264  821.3733, 498.2640,
265  821.3774, 2032.2089,
266  821.3815, 2452.4067,
267  821.3856, 1783.2299,
268  821.3896, 696.4254,
269  821.3937, 955.2690,
270  821.3978, 3954.5977,
271  821.4019, 19850.8086,
272  821.4060, 46906.4688,
273  821.4100, 68569.3750,
274  821.4141, 68448.7812,
275  821.4182, 46811.6289,
276  821.4223, 19901.8672,
277  821.4264, 3090.5479,
278  821.4305, 862.4839,
279  821.4346, 326.3895,
280  821.4387, 0.0000,
281  821.4427, 0.0000,
282  821.4468, 0.0000,
283  821.4509, 0.0000,
284  821.8556, 0.0000,
285  821.8597, 0.0000,
286  821.8638, 0.0000,
287  821.8679, 0.0000,
288  821.8719, 633.9686,
289  821.8760, 1388.3333,
290  821.8801, 1965.9994,
291  821.8842, 1568.3851,
292  821.8883, 617.3872,
293  821.8924, 471.6464,
294  821.8965, 2934.9033,
295  821.9006, 6675.8296,
296  821.9047, 23122.4727,
297  821.9088, 47305.5195,
298  821.9128, 62059.1055,
299  821.9169, 55725.9336,
300  821.9210, 33587.5078,
301  821.9251, 11589.8770,
302  821.9292, 368.7498,
303  821.9333, 725.5962,
304  821.9374, 80.9717,
305  821.9415, 0.0000,
306  821.9456, 0.0000,
307  821.9496, 0.0000,
308  821.9537, 0.0000,
309  822.3548, 0.0000,
310  822.3589, 0.0000,
311  822.3630, 0.0000,
312  822.3671, 0.0000,
313  822.3712, 106.4319,
314  822.3752, 698.2700,
315  822.3793, 1279.7435,
316  822.3834, 1498.7074,
317  822.3875, 1715.3507,
318  822.3916, 2368.6270,
319  822.3957, 2623.0996,
320  822.3998, 570.4650,
321  822.4039, 5261.7271,
322  822.4080, 15413.5098,
323  822.4121, 23855.4492,
324  822.4162, 25214.1484,
325  822.4203, 19019.5293,
326  822.4244, 9904.5566,
327  822.4285, 3034.5713,
328  822.4326, 13.8116,
329  822.4366, 0.0000,
330  822.4407, 0.0000,
331  822.4449, 0.0000,
332  822.4490, 0.0000,
333  822.8710, 0.0000,
334  822.8751, 0.0000,
335  822.8792, 0.0000,
336  822.8833, 0.0000,
337  822.8874, 635.9196,
338  822.8915, 1131.2902,
339  822.8956, 1693.5773,
340  822.8997, 1612.8446,
341  822.9038, 1345.5366,
342  822.9079, 3657.9766,
343  822.9120, 6275.4512,
344  822.9161, 7365.7505,
345  822.9202, 6641.2046,
346  822.9243, 4600.4551,
347  822.9284, 2155.3687,
348  822.9325, 336.5125,
349  822.9366, 0.0000,
350  822.9407, 0.0000,
351  822.9448, 0.0000,
352  822.9489, 0.0000,
353  823.3468, 0.0000,
354  823.3509, 0.0000,
355  823.3550, 0.0000,
356  823.3591, 0.0000,
357  823.3632, 506.6892,
358  823.3673, 877.7867,
359  823.3714, 1072.1282,
360  823.3755, 1128.3158,
361  823.3796, 1120.0167,
362  823.3837, 939.9150,
363  823.3878, 394.1900,
364  823.3920, 113.9174,
365  823.3961, 787.8625,
366  823.4001, 978.4752,
367  823.4042, 616.5432,
368  823.4084, 0.0000,
369  823.4125, 0.0000,
370  823.4166, 0.0000,
371  823.4207, 269.5316,
372  823.4248, 978.9325,
373  823.4289, 1613.0895,
374  823.4330, 1762.3575,
375  823.4371, 1326.9281,
376  823.4412, 624.3387,
377  823.4453, 0.0000,
378  823.4494, 0.0000,
379  823.4536, 0.0000,
380  823.4576, 0.0000,
381 }; // testData_
382 
383 
384 const size_t testDataSize_ = sizeof(testData_)/sizeof(double);
387 
388 
389 void test()
390 {
391  if (os_) *os_ << "test()\n" << flush;
392 
393  // instantiate PeakFamilyDetector
394 
395  PeakFamilyDetectorFT::Config pfdftConfig;
396  pfdftConfig.cp = CalibrationParameters::thermo_FT();
397  shared_ptr<PeakFamilyDetector> pfd(new PeakFamilyDetectorFT(pfdftConfig));
398 
399  // instantiate PrecursorRecalculatorDefault
400 
402  config.peakFamilyDetector = pfd;
403  config.mzLeftWidth = 1;
404  config.mzRightWidth = 2.5;
405  PrecursorRecalculatorDefault pr(config);
406 
407  // recalculate
408 
409  PrecursorRecalculator::PrecursorInfo initialEstimate;
410  initialEstimate.mz = 821.92;
411  vector<PrecursorRecalculator::PrecursorInfo> result;
412  pr.recalculate(testDataBegin_, testDataEnd_, initialEstimate, result);
413 
414  // validate result
415 
416  unit_assert(result.size() == 1);
417  unit_assert_equal(result[0].mz, 821.41, 1e-2);
418 }
419 
420 
421 void test2()
422 {
423  if (os_) *os_ << "test2()\n" << flush;
424 
425  // instantiate PeakFamilyDetector
426 
427  PeakFamilyDetectorFT::Config pfdftConfig;
428  pfdftConfig.cp = CalibrationParameters::thermo_FT();
429  shared_ptr<PeakFamilyDetector> pfd(new PeakFamilyDetectorFT(pfdftConfig));
430 
431  // instantiate PrecursorRecalculatorDefault
432 
434  config.peakFamilyDetector = pfd;
435  config.mzLeftWidth = 4;
436  config.mzRightWidth = 2.5;
437  PrecursorRecalculatorDefault pr(config);
438 
439  // recalculate
440 
441  PrecursorRecalculator::PrecursorInfo initialEstimate;
442  initialEstimate.mz = 821.92;
443  vector<PrecursorRecalculator::PrecursorInfo> result;
444  pr.recalculate(testDataBegin_, testDataEnd_, initialEstimate, result);
445 
446  // validate result
447 
448  unit_assert(result.size() == 2);
449  unit_assert_equal(result[0].mz, 821.41, 1e-2);
450  unit_assert_equal(result[1].mz, 818.42, 1e-2);
451 }
452 
453 
454 vector<MZIntensityPair> readData(const bfs::path& filename)
455 {
456  // data stored as 32-bit big-endian zlib m/z-intensity pairs (mzXML with zlib)
457 
458  bfs::ifstream is(filename);
459  if (!is) throw runtime_error(("[PrecursorRecalculatorDefaultTest::readData()] Unable to open file " + filename.string()).c_str());
460  string encoded;
461  is >> encoded;
462 
463  BinaryDataEncoder::Config bdeConfig;
467 
468  BinaryDataEncoder encoder(bdeConfig);
469  vector<double> data;
470  encoder.decode(encoded, data);
471 
472  unit_assert(!data.empty() && data.size()%2 == 0);
473  vector<MZIntensityPair> result(data.size()/2);
474  copy(data.begin(), data.end(), reinterpret_cast<double*>(&result[0]));
475  return result;
476 }
477 
478 
479 shared_ptr<PrecursorRecalculatorDefault> createPrecursorRecalculator_msprefix()
480 {
481  // instantiate PeakFamilyDetector
482 
483  PeakFamilyDetectorFT::Config pfdftConfig;
484  pfdftConfig.cp = CalibrationParameters::thermo_FT();
485  shared_ptr<PeakFamilyDetector> pfd(new PeakFamilyDetectorFT(pfdftConfig));
486 
487  // instantiate PrecursorRecalculatorDefault
488 
490  config.peakFamilyDetector = pfd;
491  config.mzLeftWidth = 3;
492  config.mzRightWidth = 1.6;
493  return shared_ptr<PrecursorRecalculatorDefault>(new PrecursorRecalculatorDefault(config));
494 }
495 
496 
497 struct TestInfo
498 {
500  double mzTrue;
502 
503  TestInfo(double _mzInitialEstimate,
504  double _mzTrue,
505  int _chargeTrue)
506  : mzInitialEstimate(_mzInitialEstimate),
507  mzTrue(_mzTrue),
508  chargeTrue(_chargeTrue)
509  {}
510 };
511 
512 
514  const MZIntensityPair* end,
516  const TestInfo& testInfo)
517 {
518  // recalculate
519 
520  PrecursorRecalculator::PrecursorInfo initialEstimate;
521  initialEstimate.mz = testInfo.mzInitialEstimate;
522 
523  vector<PrecursorRecalculator::PrecursorInfo> result;
524  pr.recalculate(begin, end, initialEstimate, result);
525 
526  // validate result
527 
528  if (os_)
529  for (vector<PrecursorRecalculator::PrecursorInfo>::const_iterator it=result.begin(), end=result.end(); it!=end; ++it)
530  *os_ << " " << it->mz << " " << it->charge << endl;
531 
532  unit_assert(result.size() >= 1);
533  unit_assert_equal(result[0].mz, testInfo.mzTrue, 1e-2);
534  unit_assert(result[0].charge == testInfo.chargeTrue);
535 }
536 
537 
538 void test5peptide(const bfs::path& datadir)
539 {
540  if (os_) *os_ << "test5peptide()\n" << flush;
541 
542  vector<MZIntensityPair> data = readData(datadir / "5peptide.b64");
543  unit_assert(data.size() == 19914);
544 
545  shared_ptr<PrecursorRecalculatorDefault> pr = createPrecursorRecalculator_msprefix();
546 
547  const MZIntensityPair* begin = &data[0];
548  const MZIntensityPair* end = begin + data.size();
549 
550  validateRecalculation(begin, end, *pr, TestInfo(810.79, 810.42, 2));
551  validateRecalculation(begin, end, *pr, TestInfo(837.34, 836.96, 2));
552  validateRecalculation(begin, end, *pr, TestInfo(725.36, 724.91, 2));
553  validateRecalculation(begin, end, *pr, TestInfo(558.87, 558.31, 3));
554  validateRecalculation(begin, end, *pr, TestInfo(812.33, 810.42, 2));
555  validateRecalculation(begin, end, *pr, TestInfo(810.75, 810.42, 2));
556  validateRecalculation(begin, end, *pr, TestInfo(837.96, 836.96, 2));
557  validateRecalculation(begin, end, *pr, TestInfo(644.06, 643.37, 2));
558  validateRecalculation(begin, end, *pr, TestInfo(725.68, 724.91, 2));
559  validateRecalculation(begin, end, *pr, TestInfo(559.19, 558.31, 3));
560  validateRecalculation(begin, end, *pr, TestInfo(811.41, 810.42, 2));
561  validateRecalculation(begin, end, *pr, TestInfo(674.64, 674.37, 2));
562  validateRecalculation(begin, end, *pr, TestInfo(882.45, 882.47, 1));
563 }
564 
565 
566 void runSpecialTest(const bfs::path& filename, size_t pairCount, const TestInfo& testInfo)
567 {
568  if (os_) *os_ << "runSpecialTest: " << filename << " " << testInfo.mzInitialEstimate << " "
569  << testInfo.mzTrue << " " << testInfo.chargeTrue << endl;
570 
571  vector<MZIntensityPair> data = readData(filename);
572  unit_assert(data.size() == pairCount);
573  shared_ptr<PrecursorRecalculatorDefault> pr = createPrecursorRecalculator_msprefix();
574  validateRecalculation(&*data.begin(), &*data.begin()+data.size(), *pr, testInfo);
575 }
576 
577 
578 void runTests(const bfs::path& datadir)
579 {
580  test();
581  test2();
582  test5peptide(datadir);
583 
584  runSpecialTest(datadir / "special_1a.b64", 12118, TestInfo(484.2727357, 484.28, 0));
585  runSpecialTest(datadir / "special_1b.b64", 17767, TestInfo(930.0000218, 929.99, 2));
586 
587  // noise floor calculation issue (due to big neighbor)
588  runSpecialTest(datadir / "special_2a.b64", 4802, TestInfo(705.0000091, 704.32, 2));
589 
590  // charge state determination (window must be > 1.5amu to the right)
591  runSpecialTest(datadir / "special_2b.b64", 8897, TestInfo(961.0000167, 960.9639, 2));
592 
593  // monoisotopic peak threshold must be lenient
594  runSpecialTest(datadir / "special_2c.b64", 7006, TestInfo(731.090919, 730.36, 3));
595  runSpecialTest(datadir / "special_2d.b64", 12512, TestInfo(730.3599854,730.36, 3));
596 
597  // charge state calculation issues due to small 1-neutron peak
598  runSpecialTest(datadir / "special_3a.b64", 5721, TestInfo(560.3636411, 560.28, 2));
599  runSpecialTest(datadir / "special_3b.b64", 5342, TestInfo(820.6363762, 820.47, 2));
600 
601  // charge state calculation issues due to small 1-neutron peak
602  runSpecialTest(datadir / "special_4a.b64", 4142, TestInfo(791.5454722, 791.37, 2));
603 
604  // charge state regression due to generous acceptance of charge 2 scores
605  runSpecialTest(datadir / "special_5a.b64", 12324, TestInfo(445.0000073, 445.12, 1));
606  runSpecialTest(datadir / "special_5a.b64", 12324, TestInfo(407.9090971, 408.31, 1));
607  runSpecialTest(datadir / "special_5a.b64", 12324, TestInfo(462.0000078, 462.14, 1));
608  runSpecialTest(datadir / "special_5a.b64", 12324, TestInfo(536.0909191, 536.16, 1));
609  runSpecialTest(datadir / "special_5a.b64", 12324, TestInfo(519.0909186, 519.14, 1));
610 
611  // lonely peaks
612  runSpecialTest(datadir / "special_6a.b64", 12358, TestInfo(1682.636408, 1683.39, 0));
613  runSpecialTest(datadir / "special_6b.b64", 12280, TestInfo(1565.636404, 1563.74, 0));
614  runSpecialTest(datadir / "special_6c.b64", 12245, TestInfo(1668.545498, 1667.55, 0));
615  runSpecialTest(datadir / "special_6d.b64", 12386, TestInfo(1851.545504, 1849.69, 0));
616  runSpecialTest(datadir / "special_6e.b64", 12221, TestInfo(1444.636401, 1442.54, 0));
617 }
618 
619 
620 int main(int argc, char* argv[])
621 {
622  TEST_PROLOG(argc, argv)
623 
624  try
625  {
626  bfs::path datadir = ".";
627 
628  for (int i=1; i<argc; i++)
629  {
630  if (!strcmp(argv[i],"-v"))
631  os_ = &cout;
632  else
633  // hack to allow running unit test from a different directory:
634  // Jamfile passes full path to specified input file.
635  // we want the path, so we can ignore filename
636  datadir = bfs::path(argv[i]).branch_path();
637  }
638 
639  runTests(datadir);
640 
641  }
642  catch (exception& e)
643  {
644  TEST_FAILED(e.what())
645  }
646  catch (...)
647  {
648  TEST_FAILED("Caught unknown exception.")
649  }
650 
652 }
653 
654