ProteoWizard
IdentDataTest.cpp
Go to the documentation of this file.
1 //
2 // $Id: IdentDataTest.cpp 4129 2012-11-20 00:05:37Z chambm $
3 //
4 //
5 // Original author: Robert Burke <robert.burke@proteowizard.org>
6 //
7 // Copyright 2009 Spielberg Family Center for Applied Proteomics
8 // University of Southern California, Los Angeles, California 90033
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 // http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // Unless required by applicable law or agreed to in writing, software
17 // distributed under the License is distributed on an "AS IS" BASIS,
18 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 // See the License for the specific language governing permissions and
20 // limitations under the License.
21 //
22 
23 
24 #define PWIZ_SOURCE
25 
29 #include "IdentData.hpp"
30 #include "Serializer_mzid.hpp"
31 #include "examples.hpp"
32 #include "Diff.hpp"
33 #include "TextWriter.hpp"
34 
35 
36 using namespace pwiz::identdata;
37 using namespace pwiz::identdata::examples;
38 using namespace pwiz::util;
39 using namespace pwiz::data;
40 namespace proteome = pwiz::proteome;
41 
42 
43 ostream* os_;
44 
45 
47 {
48  using namespace pwiz::proteome;
49 
50  IdentData mzid;
52 
53  SpectrumIdentificationProtocolPtr sip = mzid.analysisProtocolCollection.spectrumIdentificationProtocol[0];
54  SpectrumIdentificationListPtr sil = mzid.dataCollection.analysisData.spectrumIdentificationList[0];
55 
56  SpectrumIdentificationResultPtr result2 = sil->spectrumIdentificationResult[1];
57 
58  // test with multiple simultaneous enzymes (Lys-C/P and Arg-C)
59  {
60  // result 2 rank 1: K.QTQTFTTYSDNQPGVLIQVYEGER.A
61 
62  SpectrumIdentificationItemPtr result2_rank1 = result2->spectrumIdentificationItem[0];
63 
64  // both termini are specific now, one cut from each enzyme
65  vector<DigestedPeptide> result2_rank1_digestedPeptides = digestedPeptides(*sip, *result2_rank1);
66  unit_assert_operator_equal(1, result2_rank1_digestedPeptides.size());
67  unit_assert(digestedPeptide(*sip, *result2_rank1->peptideEvidencePtr[0]) == result2_rank1_digestedPeptides[0]);
68  unit_assert_operator_equal(0, result2_rank1_digestedPeptides[0].missedCleavages());
69  unit_assert_operator_equal(2, result2_rank1_digestedPeptides[0].specificTermini());
70  unit_assert(result2_rank1_digestedPeptides[0].NTerminusIsSpecific());
71  unit_assert(result2_rank1_digestedPeptides[0].CTerminusIsSpecific());
72  }
73 
74  // test with multiple independent enzymes (Lys-C/P and Arg-C)
75  sip->enzymes.independent = true;
76  {
77  // result 2 rank 1: K.QTQTFTTYSDNQPGVLIQVYEGER.A
78 
79  SpectrumIdentificationItemPtr result2_rank1 = result2->spectrumIdentificationItem[0];
80 
81  // reassign the original prefix residue
82  result2_rank1->peptideEvidencePtr[0]->pre = 'K';
83 
84  // there are two semi-specific peptides, one cut by Lys-C and the other cut by Arg-C;
85  // only the first one will be returned because they have the same "best specificity"
86 
87  vector<DigestedPeptide> result2_rank1_digestedPeptides = digestedPeptides(*sip, *result2_rank1);
88  unit_assert_operator_equal(1, result2_rank1_digestedPeptides.size());
89  unit_assert(digestedPeptide(*sip, *result2_rank1->peptideEvidencePtr[0]) == result2_rank1_digestedPeptides[0]);
90  unit_assert_operator_equal(0, result2_rank1_digestedPeptides[0].missedCleavages());
91  unit_assert_operator_equal(1, result2_rank1_digestedPeptides[0].specificTermini());
92  unit_assert(result2_rank1_digestedPeptides[0].NTerminusIsSpecific());
93  unit_assert(!result2_rank1_digestedPeptides[0].CTerminusIsSpecific());
94  }
95 
96  // change from multiple enzymes to trypsin/p and test again
97  sip->enzymes.enzymes.clear();
98  EnzymePtr trypsin(new Enzyme);
99  trypsin->id = "ENZ_1";
100  trypsin->cTermGain = "OH";
101  trypsin->nTermGain = "H";
102  trypsin->missedCleavages = 2;
103  trypsin->minDistance = 1;
104  trypsin->terminalSpecificity = proteome::Digestion::FullySpecific;
105  trypsin->siteRegexp = "(?<=[KR])";
106  trypsin->enzymeName.set(MS_Trypsin_P);
107  sip->enzymes.enzymes.push_back(trypsin);
108 
109  {
110  // result 2 rank 1: K.QTQTFTTYSDNQPGVLIQVYEGER.A
111  SpectrumIdentificationItemPtr result2_rank1 = result2->spectrumIdentificationItem[0];
112  vector<DigestedPeptide> result2_rank1_digestedPeptides = digestedPeptides(*sip, *result2_rank1);
113  unit_assert_operator_equal(1, result2_rank1_digestedPeptides.size());
114  unit_assert(digestedPeptide(*sip, *result2_rank1->peptideEvidencePtr[0]) == result2_rank1_digestedPeptides[0]);
115  unit_assert_operator_equal(423, result2_rank1_digestedPeptides[0].offset());
116  unit_assert_operator_equal(0, result2_rank1_digestedPeptides[0].missedCleavages());
117  unit_assert_operator_equal(2, result2_rank1_digestedPeptides[0].specificTermini());
118  unit_assert(result2_rank1_digestedPeptides[0].NTerminusIsSpecific());
119  unit_assert(result2_rank1_digestedPeptides[0].CTerminusIsSpecific());
120  unit_assert_operator_equal("K", result2_rank1_digestedPeptides[0].NTerminusPrefix());
121  unit_assert_operator_equal("A", result2_rank1_digestedPeptides[0].CTerminusSuffix());
122 
123  // result 2 rank 2: K.RNSTIPT.K
124  SpectrumIdentificationItemPtr result2_rank2 = result2->spectrumIdentificationItem[1];
125  vector<DigestedPeptide> result2_rank2_digestedPeptides = digestedPeptides(*sip, *result2_rank2);
126  unit_assert_operator_equal(2, result2_rank2_digestedPeptides.size());
127 
128  // both PeptideEvidences have the same values
129  for (int i=0; i < 2; ++i)
130  {
131  unit_assert(digestedPeptide(*sip, *result2_rank2->peptideEvidencePtr[i]) == result2_rank2_digestedPeptides[i]);
132  unit_assert_operator_equal(415, result2_rank2_digestedPeptides[i].offset());
133  unit_assert_operator_equal(1, result2_rank2_digestedPeptides[i].missedCleavages());
134  unit_assert_operator_equal(1, result2_rank2_digestedPeptides[i].specificTermini());
135  unit_assert(result2_rank2_digestedPeptides[i].NTerminusIsSpecific());
136  unit_assert(!result2_rank2_digestedPeptides[i].CTerminusIsSpecific());
137  unit_assert_operator_equal("K", result2_rank2_digestedPeptides[i].NTerminusPrefix());
138  unit_assert_operator_equal("K", result2_rank2_digestedPeptides[i].CTerminusSuffix());
139  }
140  }
141 
142  // change enzyme from trypsin to Lys-C and test again
143  sip->enzymes.enzymes[0]->enzymeName.clear();
144  sip->enzymes.enzymes[0]->siteRegexp = "(?<=K)";
145 
146  {
147  // result 2 rank 1: K.QTQTFTTYSDNQPGVLIQVYEGER.A
148  SpectrumIdentificationItemPtr result2_rank1 = result2->spectrumIdentificationItem[0];
149  vector<DigestedPeptide> result2_rank1_digestedPeptides = digestedPeptides(*sip, *result2_rank1);
150  unit_assert_operator_equal(1, result2_rank1_digestedPeptides.size());
151  unit_assert(digestedPeptide(*sip, *result2_rank1->peptideEvidencePtr[0]) == result2_rank1_digestedPeptides[0]);
152  unit_assert_operator_equal(0, result2_rank1_digestedPeptides[0].missedCleavages());
153  unit_assert_operator_equal(1, result2_rank1_digestedPeptides[0].specificTermini());
154  unit_assert(result2_rank1_digestedPeptides[0].NTerminusIsSpecific());
155  unit_assert(!result2_rank1_digestedPeptides[0].CTerminusIsSpecific());
156 
157  // result 2 rank 2: K.RNSTIPT.K
158  SpectrumIdentificationItemPtr result2_rank2 = result2->spectrumIdentificationItem[1];
159  vector<DigestedPeptide> result2_rank2_digestedPeptides = digestedPeptides(*sip, *result2_rank2);
160  unit_assert_operator_equal(2, result2_rank2_digestedPeptides.size());
161 
162  // both PeptideEvidences have the same values
163  for (int i=0; i < 2; ++i)
164  {
165  unit_assert(digestedPeptide(*sip, *result2_rank2->peptideEvidencePtr[i]) == result2_rank2_digestedPeptides[i]);
166  unit_assert_operator_equal(0, result2_rank2_digestedPeptides[i].missedCleavages());
167  unit_assert_operator_equal(1, result2_rank2_digestedPeptides[i].specificTermini());
168  unit_assert(result2_rank2_digestedPeptides[i].NTerminusIsSpecific());
169  unit_assert(!result2_rank2_digestedPeptides[i].CTerminusIsSpecific());
170  }
171  }
172 
173  // change enzyme from Lys-C to unspecific cleavage and test again
174  sip->enzymes.enzymes[0]->enzymeName.set(MS_unspecific_cleavage);
175  sip->enzymes.enzymes[0]->siteRegexp.clear();
176 
177  {
178  // result 2 rank 1: K.QTQTFTTYSDNQPGVLIQVYEGER.A
179  SpectrumIdentificationItemPtr result2_rank1 = result2->spectrumIdentificationItem[0];
180  vector<DigestedPeptide> result2_rank1_digestedPeptides = digestedPeptides(*sip, *result2_rank1);
181  unit_assert_operator_equal(1, result2_rank1_digestedPeptides.size());
182  unit_assert(digestedPeptide(*sip, *result2_rank1->peptideEvidencePtr[0]) == result2_rank1_digestedPeptides[0]);
183  unit_assert_operator_equal(0, result2_rank1_digestedPeptides[0].missedCleavages());
184  unit_assert_operator_equal(0, result2_rank1_digestedPeptides[0].specificTermini());
185  unit_assert(!result2_rank1_digestedPeptides[0].NTerminusIsSpecific());
186  unit_assert(!result2_rank1_digestedPeptides[0].CTerminusIsSpecific());
187 
188  // result 2 rank 2: K.RNSTIPT.K
189  SpectrumIdentificationItemPtr result2_rank2 = result2->spectrumIdentificationItem[1];
190  vector<DigestedPeptide> result2_rank2_digestedPeptides = digestedPeptides(*sip, *result2_rank2);
191  unit_assert_operator_equal(2, result2_rank2_digestedPeptides.size());
192 
193  // both PeptideEvidences have the same values
194  for (int i=0; i < 2; ++i)
195  {
196  unit_assert(digestedPeptide(*sip, *result2_rank2->peptideEvidencePtr[i]) == result2_rank2_digestedPeptides[i]);
197  unit_assert_operator_equal(0, result2_rank2_digestedPeptides[i].missedCleavages());
198  unit_assert_operator_equal(0, result2_rank2_digestedPeptides[i].specificTermini());
199  unit_assert(!result2_rank2_digestedPeptides[i].NTerminusIsSpecific());
200  unit_assert(!result2_rank2_digestedPeptides[i].CTerminusIsSpecific());
201  }
202  }
203 
204  // change enzyme from unspecific cleavage to no cleavage and test again
205  sip->enzymes.enzymes[0]->enzymeName.clear();
206  sip->enzymes.enzymes[0]->enzymeName.set(MS_no_cleavage);
207 
208  {
209  // result 2 rank 1: K.QTQTFTTYSDNQPGVLIQVYEGER.A
210  SpectrumIdentificationItemPtr result2_rank1 = result2->spectrumIdentificationItem[0];
211  vector<DigestedPeptide> result2_rank1_digestedPeptides = digestedPeptides(*sip, *result2_rank1);
212  unit_assert_operator_equal(1, result2_rank1_digestedPeptides.size());
213  unit_assert(digestedPeptide(*sip, *result2_rank1->peptideEvidencePtr[0]) == result2_rank1_digestedPeptides[0]);
214  unit_assert_operator_equal(0, result2_rank1_digestedPeptides[0].missedCleavages());
215  unit_assert_operator_equal(2, result2_rank1_digestedPeptides[0].specificTermini());
216  unit_assert(result2_rank1_digestedPeptides[0].NTerminusIsSpecific());
217  unit_assert(result2_rank1_digestedPeptides[0].CTerminusIsSpecific());
218 
219  // result 2 rank 2: K.RNSTIPT.K
220  SpectrumIdentificationItemPtr result2_rank2 = result2->spectrumIdentificationItem[1];
221  vector<DigestedPeptide> result2_rank2_digestedPeptides = digestedPeptides(*sip, *result2_rank2);
222  unit_assert_operator_equal(2, result2_rank2_digestedPeptides.size());
223 
224  // both PeptideEvidences have the same values
225  for (int i=0; i < 2; ++i)
226  {
227  unit_assert(digestedPeptide(*sip, *result2_rank2->peptideEvidencePtr[i]) == result2_rank2_digestedPeptides[i]);
228  unit_assert_operator_equal(0, result2_rank2_digestedPeptides[i].missedCleavages());
229  unit_assert_operator_equal(2, result2_rank2_digestedPeptides[i].specificTermini());
230  unit_assert(result2_rank2_digestedPeptides[i].NTerminusIsSpecific());
231  unit_assert(result2_rank2_digestedPeptides[i].CTerminusIsSpecific());
232  }
233  }
234 
235  // change enzyme from no cleavage to Lys-N and test again
236  sip->enzymes.enzymes[0]->enzymeName.clear();
237  sip->enzymes.enzymes[0]->siteRegexp = "(?=K)";
238 
239  {
240  // result 2 rank 1: K.QTQTFTTYSDNQPGVLIQVYEGER.A
241  SpectrumIdentificationItemPtr result2_rank1 = result2->spectrumIdentificationItem[0];
242  vector<DigestedPeptide> result2_rank1_digestedPeptides = digestedPeptides(*sip, *result2_rank1);
243  unit_assert_operator_equal(1, result2_rank1_digestedPeptides.size());
244  unit_assert(digestedPeptide(*sip, *result2_rank1->peptideEvidencePtr[0]) == result2_rank1_digestedPeptides[0]);
245  unit_assert_operator_equal(0, result2_rank1_digestedPeptides[0].missedCleavages());
246  unit_assert_operator_equal(0, result2_rank1_digestedPeptides[0].specificTermini());
247  unit_assert(!result2_rank1_digestedPeptides[0].NTerminusIsSpecific());
248  unit_assert(!result2_rank1_digestedPeptides[0].CTerminusIsSpecific());
249 
250  // result 2 rank 2: K.RNSTIPT.K
251  SpectrumIdentificationItemPtr result2_rank2 = result2->spectrumIdentificationItem[1];
252  vector<DigestedPeptide> result2_rank2_digestedPeptides = digestedPeptides(*sip, *result2_rank2);
253  unit_assert_operator_equal(2, result2_rank2_digestedPeptides.size());
254 
255  // both PeptideEvidences have the same values
256  for (int i=0; i < 2; ++i)
257  {
258  unit_assert(digestedPeptide(*sip, *result2_rank2->peptideEvidencePtr[i]) == result2_rank2_digestedPeptides[i]);
259  unit_assert_operator_equal(0, result2_rank2_digestedPeptides[i].missedCleavages());
260  unit_assert_operator_equal(1, result2_rank2_digestedPeptides[i].specificTermini());
261  unit_assert(!result2_rank2_digestedPeptides[i].NTerminusIsSpecific());
262  unit_assert(result2_rank2_digestedPeptides[i].CTerminusIsSpecific());
263  }
264  }
265 
266  {
267  // result 2 rank 1: K.QTQTFTTYSDNQPGVLIQVYEGER.A
268 
269  SpectrumIdentificationItemPtr result2_rank1 = result2->spectrumIdentificationItem[0];
270 
271  // move it to the C terminus
272  result2_rank1->peptideEvidencePtr[0]->start = 618;
273  result2_rank1->peptideEvidencePtr[0]->post = '-';
274 
275  vector<DigestedPeptide> result2_rank1_digestedPeptides = digestedPeptides(*sip, *result2_rank1);
276  unit_assert_operator_equal(1, result2_rank1_digestedPeptides.size());
277  unit_assert(digestedPeptide(*sip, *result2_rank1->peptideEvidencePtr[0]) == result2_rank1_digestedPeptides[0]);
278  unit_assert_operator_equal(617, result2_rank1_digestedPeptides[0].offset());
279  unit_assert_operator_equal(0, result2_rank1_digestedPeptides[0].missedCleavages());
280  unit_assert_operator_equal(1, result2_rank1_digestedPeptides[0].specificTermini());
281  unit_assert(!result2_rank1_digestedPeptides[0].NTerminusIsSpecific());
282  unit_assert(result2_rank1_digestedPeptides[0].CTerminusIsSpecific());
283  unit_assert_operator_equal("K", result2_rank1_digestedPeptides[0].NTerminusPrefix());
284  unit_assert_operator_equal("-", result2_rank1_digestedPeptides[0].CTerminusSuffix());
285 
286  // move it to the N terminus
287  result2_rank1->peptideEvidencePtr[0]->start = 1;
288  result2_rank1->peptideEvidencePtr[0]->pre = '-';
289  result2_rank1->peptideEvidencePtr[0]->post = 'A';
290 
291  result2_rank1_digestedPeptides = digestedPeptides(*sip, *result2_rank1);
292  unit_assert_operator_equal(1, result2_rank1_digestedPeptides.size());
293  unit_assert(digestedPeptide(*sip, *result2_rank1->peptideEvidencePtr[0]) == result2_rank1_digestedPeptides[0]);
294  unit_assert_operator_equal(0, result2_rank1_digestedPeptides[0].offset());
295  unit_assert_operator_equal(0, result2_rank1_digestedPeptides[0].missedCleavages());
296  unit_assert_operator_equal(1, result2_rank1_digestedPeptides[0].specificTermini());
297  unit_assert(result2_rank1_digestedPeptides[0].NTerminusIsSpecific());
298  unit_assert(!result2_rank1_digestedPeptides[0].CTerminusIsSpecific());
299  unit_assert_operator_equal("-", result2_rank1_digestedPeptides[0].NTerminusPrefix());
300  unit_assert_operator_equal("A", result2_rank1_digestedPeptides[0].CTerminusSuffix());
301  }
302 }
303 
305 {
306  IdentData mzid, mzid2;
309 
310  BOOST_FOREACH(SpectrumIdentificationProtocolPtr& sip, mzid2.analysisProtocolCollection.spectrumIdentificationProtocol)
311  BOOST_FOREACH(SearchModificationPtr& mod, sip->modificationParams)
312  mod->cvParams.clear();
313 
314  BOOST_FOREACH(PeptidePtr& pep, mzid2.sequenceCollection.peptides)
315  BOOST_FOREACH(ModificationPtr& mod, pep->modification)
316  mod->cvParams.clear();
317 
318  Diff<IdentData, DiffConfig> diff(mzid, mzid2);
319  unit_assert(diff);
320 
321  BOOST_FOREACH(SpectrumIdentificationPtr& si, mzid2.analysisCollection.spectrumIdentification)
323 
324  diff(mzid, mzid2);
325  if (diff && os_) *os_ << "diff:\n" << diff_string<TextWriter>(diff) << endl;
326  unit_assert(!diff);
327 }
328 
330 {
332 
333  IdentData mzid;
335 
336  // PEP_2: TAIGIDLGT[80]TYSC[57]VGVFQHGK
338  unit_assert_operator_equal("TAIGIDLGTTYSCVGVFQHGK", pep2.sequence());
342  pep2.modifications().find(8)->second.monoisotopicDeltaMass());
345  pep2.modifications().find(12)->second.monoisotopicDeltaMass());
346 
347  // PEP_5: RNS[80]TIPT[-1]
349  unit_assert_operator_equal("RNSTIPT", pep5.sequence());
350  unit_assert_operator_equal(2, pep5.modifications().size());
351  unit_assert_operator_equal(1, pep5.modifications().count(2));
353  pep5.modifications().find(2)->second.monoisotopicDeltaMass());
354  unit_assert_operator_equal(1, pep5.modifications().count(ModificationMap::CTerminus()));
356  pep5.modifications().find(ModificationMap::CTerminus())->second.monoisotopicDeltaMass());
357 }
358 
360 {
361  {
362  Enzyme ez;
365  }
366 
367  {
368  Enzyme ez;
369  ez.enzymeName.userParams.push_back(UserParam("trypsin/p"));
371  }
372 
373  {
374  Enzyme ez;
375  ez.name = "trypsin/p";
377  }
378 
379  {
380  Enzyme ez;
381  ez.siteRegexp = "(?<=[KR])(?!P)";
383  }
384 }
385 
386 
387 int main(int argc, char** argv)
388 {
389  TEST_PROLOG(argc, argv)
390 
391  if (argc>1 && !strcmp(argv[1],"-v")) os_ = &cout;
392  if (os_) *os_ << "MzIdentMLTest\n";
393 
394  try
395  {
398  testConversion();
400  }
401  catch (exception& e)
402  {
403  TEST_FAILED(e.what())
404  }
405  catch (...)
406  {
407  TEST_FAILED("Caught unknown exception.")
408  }
409 
411 }