ProteoWizard
MinimumPepXML.hpp
Go to the documentation of this file.
1 //
2 // $Id: MinimumPepXML.hpp 1970 2010-05-11 20:23:31Z broter $
3 //
4 //
5 // Original author: Kate Hoff <katherine.hoff@proteowizard.org>
6 //
7 // Copyright 2009 Spielberg Family Center for Applied Proteomics
8 // Cedars-Sinai Medical Center, Los Angeles, California 90048
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 // http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // Unless required by applicable law or agreed to in writing, software
17 // distributed under the License is distributed on an "AS IS" BASIS,
18 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 // See the License for the specific language governing permissions and
20 // limitations under the License.
21 //
22 
23 
24 #ifndef _MINIMUMPEPXML_HPP_
25 #define _MINIMUMPEPXML_HPP_
26 
29 #include "boost/shared_ptr.hpp"
30 #include "boost/logic/tribool.hpp"
31 
32 #include <iostream>
33 #include <stdexcept>
34 
35 using namespace pwiz::minimxml;
36 using namespace pwiz::data::peakdata;
37 
38 namespace pwiz{
39 namespace data{
40 namespace pepxml{
41 
42 void setLogStream(std::ostream& os);
43 
45 {
46  Specificity() : minSpace(1) {}
47 
48  /// One or more 1-letter residue codes. Enzyme cleaves on the
49  /// sense side of the residue(s) listed in cut unless one of the
50  /// residues listed in no_cut is adjacent to the potential
51  /// cleavage site.
52  std::string cut;
53 
54  /// Zero or more 1-letter residue codes. Enzyme cleaves on the
55  /// sense side of the residue(s) listed in cut unless one of the
56  /// residues listed in no_cut is adjacent to the potential
57  /// cleavage site.
58  std::string noCut;
59 
60  /// Defines whether cleavage occurs on the C-terminal or
61  /// N-terminal side of the residue(s) listed in cut (values "C" or
62  /// "N")
63  std::string sense;
64 
65  /// minimum separation between adjacent cleavages. default 1.
66  size_t minSpace;
67 
68  void write(XMLWriter& writer) const;
69  void read(std::istream& is);
70 
71  bool operator==(const Specificity& that) const;
72  bool operator!=(const Specificity& that) const;
73 
74 };
75 
77 {
78  SampleEnzyme() : independent(boost::indeterminate) {}
79 
80  /// Controlled code name for the enzyme that can be referred to by
81  /// applications.
82  std::string name;
83 
84  /// Free text to describe alternative names, special conditions,
85  /// etc.
86  std::string description;
87 
88  /// Semispecific means that at least one end of a pepide must
89  /// conform to the cleavage specificity, (unless the peptide was
90  /// at the terminus of the parent sequence). Nonspecific means
91  /// that neither end of a peptide must conform to the cleavage
92  /// specificity.
93  std::string fidelity;
94 
95  /// If there are multiple specificities and independent is true,
96  /// then a single peptide cannot exhibit one specificity at one
97  /// terminus and a different specificity at the other. If
98  /// independent is false, then a single peptide can exhibit mixed
99  /// specificities.
100  boost::tribool independent;
101 
103 
104  void write(XMLWriter& writer) const;
105  void read(std::istream& is);
106 
107  bool operator==(const SampleEnzyme& that) const;
108  bool operator!=(const SampleEnzyme& that) const;
109 
110 };
111 
113 {
115 
116  std::string localPath;
117  std::string databaseName;
121  std::string type;
122 
123  void write(XMLWriter& writer) const;
124  void read(std::istream& is);
125 
126  bool operator==(const SearchDatabase& that) const;
127  bool operator!=(const SearchDatabase& that) const;
128 
129 };
130 
132 {
133  Q3RatioResult() : lightFirstScan(0), lightLastScan(0), lightMass(0), heavyFirstScan(0), heavyLastScan(0), heavyMass(0), lightArea(0), heavyArea(0), q2LightArea(0), q2HeavyArea(0), decimalRatio(0) {}
134 
137  double lightMass;
140  double heavyMass;
141  double lightArea;
142  double heavyArea;
143  double q2LightArea;
144  double q2HeavyArea;
145  double decimalRatio;
146 
147  void write(XMLWriter& writer) const;
148  void read(std::istream& is);
149 
150  bool operator==(const Q3RatioResult& that) const;
151  bool operator!=(const Q3RatioResult& that) const;
152 
153 };
154 
156 {
157  double min_prob;
158  double sensitivity;
159  double error;
160  long num_corr;
162 };
163 
165 {
166  double error;
167  double min_prob;
168  long num_corr;
170 };
171 
173 {
174  double fvalue;
184 };
185 
187 {
189  std::string comments;
194 
195  // TODO Child tags go here... Don't forget to add the struct for
196  // them too
197 };
198 
200 {
201  std::string version;
202  std::string author;
203  double min_prob;
204  std::string options;
206 
207  std::vector<std::string> inputFile;
208  std::vector<RocDataPoint> roc_data_point;
209  std::vector<ErrorPoint> error_point;
210  std::vector<DistributionPoint> distribution_point;
211  std::vector<MixtureModel> mixture_model;
212 };
213 
215 {
217 
218  double probability;
219  std::vector<double> allNttProb;
220  std::string analysis;
221 
222  // TODO add search_score_summary and its parameter (2+ occurances)
223 
224  void write(XMLWriter& writer) const;
225  void read(std::istream& is);
226 
227  bool operator==(const PeptideProphetResult& that) const;
228  bool operator!=(const PeptideProphetResult& that) const;
229 
230 };
231 
233 {
234  AnalysisResult() : analysis("peptideprophet_result") {}
235 
236  std::string analysis;
239 
240  void write(XMLWriter& writer) const;
241  void read(std::istream& is);
242 
243  bool operator==(const AnalysisResult& that) const;
244  bool operator!=(const AnalysisResult& that) const;
245 
246 };
247 
249 {
251 
252  std::string protein;
253  std::string proteinDescr;
254  std::string numTolTerm;
255 
256  void write(XMLWriter& writer) const;
257  void read(std::istream& is);
258 
259  bool operator==(const AlternativeProtein& that) const;
260  bool operator!=(const AlternativeProtein& that) const;
261 
262 };
263 
265 {
266  ModAminoAcidMass() : position(0), mass(0) {}
267 
268  int position;
269  double mass;
270 
271  void write(XMLWriter& writer) const;
272  void read(std::istream& is);
273 
274  bool operator==(const ModAminoAcidMass& that) const;
275  bool operator!=(const ModAminoAcidMass& that) const;
276 
277 };
278 
280 {
282 
283  std::string modifiedPeptide;
285 
286  void write(XMLWriter& writer) const;
287  void read(std::istream& is);
288 
289  bool operator==(const ModificationInfo& that) const;
290  bool operator!=(const ModificationInfo& that) const;
291 
292 };
293 
295 {
296  Parameter(const std::string& name = "", const std::string& value = "")
297  : name(name), value(value)
298  {}
299 
300  std::string name;
301  std::string value;
302 
303  void write(XMLWriter& writer) const;
304  void read(std::istream& is);
305 
306  bool operator==(const Parameter& that) const;
307  bool operator!=(const Parameter& that) const;
308 };
309 
310 typedef boost::shared_ptr<Parameter> ParameterPtr;
311 
312 
314 {
315  SearchScore(const std::string& name = "", const std::string& value = "")
316  : Parameter(name, value)
317  {}
318 
319  void write(XMLWriter& writer) const;
320  void read(std::istream& is);
321 
322 };
323 
324 typedef boost::shared_ptr<SearchScore> SearchScorePtr;
325 
326 
328 {
329  SearchHit() : hitRank(0),numTotalProteins(0), numMatchedIons(0), totalNumIons(0), calcNeutralPepMass(0), massDiff(0), numTolTerm(0), numMissedCleavages(0), isRejected(0) {}
330 
331  int hitRank;
332  std::string peptide;
333  std::string peptidePrevAA;
334  std::string peptideNextAA;
335  std::string protein;
336  std::string proteinDescr;
341  double massDiff;
344  int isRejected; // bool?
346  std::vector<AlternativeProtein> alternativeProteins;
348 
349  std::vector<SearchScorePtr> searchScore;
350 
351  void write(XMLWriter& writer) const;
352  void read(std::istream& is);
353 
354  bool operator==(const SearchHit& that) const;
355  bool operator!=(const SearchHit& that) const;
356 
357 };
358 
359 typedef boost::shared_ptr<SearchHit> SearchHitPtr;
360 
361 
362 PWIZ_API_DECL bool operator==(const SearchHitPtr left, const SearchHitPtr right);
363 
365 {
366  SearchResult(size_t searchId = 0) :searchId(searchId){}
367 
368  /// Unique identifier to search summary
369  size_t searchId;
370 
371  std::vector<SearchHitPtr> searchHit;
372 
373  void write(XMLWriter& writer) const;
374  void read(std::istream& is);
375 
376  bool operator==(const SearchResult& that) const;
377  bool operator!=(const SearchResult& that) const;
378 
379 };
380 
381 typedef boost::shared_ptr<SearchResult> SearchResultPtr;
382 
384 
385 
387 {
388  EnzymaticSearchConstraint() : maxNumInternalCleavages(0), minNumTermini(0){}
389 
390  std::string enzyme;
393 
394  void write(XMLWriter& writer) const;
395  void read(std::istream& is);
396 
397  bool operator==(const EnzymaticSearchConstraint& that) const;
398  bool operator!=(const EnzymaticSearchConstraint& that) const;
399 
400 };
401 
403 {
404  AminoAcidModification() : massDiff(0), mass(0) {}
405 
406  std::string aminoAcid;
407  double massDiff;
408  double mass;
409  std::string variable;
410  std::string peptideTerminus;
411  std::string binary;
412  std::string description;
413  std::string symbol;
414 
415  void write(XMLWriter& writer) const;
416  void read(std::istream& is);
417 
418  bool operator==(const AminoAcidModification& that) const;
419  bool operator!=(const AminoAcidModification& that) const;
420 
421 };
422 
423 /// Database search settings
425 {
427 
428  /// Full path location of mzXML file for this search run (without
429  /// the .mzXML extension)
430  std::string baseName;
431 
432  /// SEQUEST, Mascot, COMET, etc
433  std::string searchEngine;
434 
435  /// average or monoisotopic
436  std::string precursorMassType;
437 
438  /// average or monoisotopic
439  std::string fragmentMassType;
440 
441  /// Format of file storing the runner up peptides (if not present
442  /// in pepXML)
443  std::string searchID;
444 
445  /// runner up search hit data type extension (e.g. .tgz)
447 
448  /// matches id in search hit
449  size_t search_id;
450 
452  std::vector<AminoAcidModification> aminoAcidModifications;
453 
454  std::vector<ParameterPtr> parameters;
455 
456  void write(XMLWriter& writer) const;
457  void read(std::istream& is);
458 
459  bool operator==(const SearchSummary& that) const;
460  bool operator!=(const SearchSummary& that) const;
461 
462 };
463 
464 typedef boost::shared_ptr<SearchSummary> SearchSummaryPtr;
465 
466 PWIZ_API_DECL bool operator==(const SearchSummaryPtr left, const SearchSummaryPtr right);
467 
468 
469 /// Reference for analysis applied to current run (time corresponds
470 /// with analysis_summary/@time, id corresponds with
471 /// analysis_result/@id)
473 {
474  /// Date of analysis
475  std::string time;
476 
477  /// Analysis name
478  std::string analsysis;
479 
480  /// Unique identifier for each type of analysis
481  size_t id;
482 
483  // Evil ##any data goes here
484 };
485 
486 
488 {
489  SpectrumQuery() : startScan(0), endScan(0), precursorNeutralMass(0), assumedCharge(0), index(0), retentionTimeSec(0) {}
490 
491  std::string spectrum;
492 
493  /// first scan number integrated into MS/MS spectrum
495 
496  /// last scan number integrated into MS/MS spectrum
497  int endScan;
498 
500 
501  /// Precursor ion charge used for search
503 
504  /// Search constraint applied specifically to this query
505  int index;
506 
507  /// Unique identifier
509 
510  std::vector<SearchResultPtr> searchResult;
511 
512  void write(XMLWriter& writer) const;
513  void read(std::istream& is);
514 
515  bool operator==(const SpectrumQuery& that) const;
516  bool operator!=(const SpectrumQuery& that) const;
517 
518 };
519 
520 typedef boost::shared_ptr<SpectrumQuery> SpectrumQueryPtr;
521 
522 PWIZ_API_DECL bool operator==(const SpectrumQueryPtr left, const SpectrumQueryPtr right);
523 
525 {
527 
528  std::string base_name;
529  std::string raw_data_type;
530  std::string raw_data;
531  std::string msManufacturer;
532  std::string msModel;
533  std::string msIonization;
534  std::string msMassAnalyzer;
535  std::string msDetector;
536 
538  std::vector<SearchSummaryPtr> searchSummary;
539  std::vector<SpectrumQueryPtr> spectrumQueries;
540 
541  void write(XMLWriter& writer) const;
542  void read(std::istream& is);
543 
544  bool operator==(const MSMSRunSummary& that) const;
545  bool operator!=(const MSMSRunSummary& that) const;
546 
547 };
548 
550 {
551  /// Time analysis complete (unique id)
552  std::string time;
553 
554  /// Name of analysis program
555  std::string analysis;
556 
557  /// Release
558  std::string version;
559 
560  // All the unknown stuff goes here
561 
562  // TODO deal with the results of
563  // <xs:any namespace="##any" processContents="lax" minOccurs="0">
564  std::vector<PeptideProphetSummary> peptideprophet_summary;
565 };
566 
567 typedef boost::shared_ptr<AnalysisSummary> AnalysisSummaryPtr;
568 
569 
571 {
572  size_t number;
573 
574  /// File from which derived
575  std::string parent_file;
576 
577  std::string windows_parent;
578 
579  /// filtering criteria applied to data
580  std::string description;
581 };
582 
583 typedef boost::shared_ptr<DataFilter> DataFilterPtr;
584 
585 
586 /// Source and filtering criteria used to generate dataset
588 {
589  /// number preceding filter generations
591 
592  std::vector<DataFilterPtr> dataFilters;
593 };
594 
595 typedef boost::shared_ptr<DatasetDerivation> DatasetDerivationPtr;
596 
597 
599 {
601 
602  std::string date;
603  std::string summaryXML;
604  std::string xmlns;
605  std::string xmlnsXSI;
606  std::string XSISchemaLocation;
607 
608  /// full path file name of mzXML (minus the .mzXML)
609  std::string baseName;
610 
611  /// raw data type extension (e.g. .mzXML)
612  std::string raw_data_type;
613 
614  /// raw data type extension (e.g. .mzXML)
615  std::string raw_data;
616 
617  /// Manufacturer of MS/MS instrument
618  std::string msManufacturer;
619 
620  /// Instrument model (cf mzXML)
621  std::string msModel;
622 
623  /// Instrument model (cf mzXML)
624  std::string msIonization;
625 
626  /// Ion trap, etc (cf mzXML)
627  std::string msMassAnalyzer;
628 
629  /// EMT, etc(cf mzXML)
630  std::string msDetector;
631 
635 
636  void write(XMLWriter& writer) const;
637  void read(std::istream& is);
638 
639  bool operator==(const MSMSPipelineAnalysis& that) const;
640  bool operator!=(const MSMSPipelineAnalysis& that) const;
641 
642 };
643 
645 {
646  Match() : score(0), feature(new Feature()) {}
647  Match(const SpectrumQuery& _spectrumQuery, FeaturePtr _feature, double _score = 0) : score(_score), spectrumQuery(_spectrumQuery), feature(_feature) {}
648 
649  double score;
651  double massDeviation; // ( feature mz - proton mass ) * charge - calculatedMass (not absolute val!)
652 
655 
656  void write(minimxml::XMLWriter& writer) const;
657  void read(std::istream& is);
658 
659  bool operator==(const Match& that) const;
660  bool operator!=(const Match& that) const;
661 
662 private:
663  Match(Match&);
664  Match operator=(Match&);
665 
666 };
667 
668 typedef boost::shared_ptr<Match> MatchPtr;
669 
670 PWIZ_API_DECL bool operator==(const MatchPtr left, const MatchPtr right);
671 
672 
674 {
676  MatchData(std::string wfc, std::string snc) : warpFunctionCalculator(wfc), searchNbhdCalculator(snc) {}
677  MatchData(std::vector<MatchPtr> _matches) : matches(_matches){}
678 
680  std::string searchNbhdCalculator;
681  std::vector<MatchPtr> matches;
682 
683  void write(minimxml::XMLWriter& writer) const;
684  void read(std::istream& is);
685 
686  bool operator==(const MatchData& that) const;
687  bool operator!=(const MatchData& that) const;
688 
689 };
690 
691 } // namespace pepxml
692 } // namespace data
693 } // namespace pwiz
694 
695 
696 
697 #endif // _MINIMUMPEPXML_HPP_
698 
699 // LocalWords: RatioResult