ProteoWizard
DigestionTest.cpp
Go to the documentation of this file.
1 //
2 // $Id: DigestionTest.cpp 4129 2012-11-20 00:05:37Z chambm $
3 //
4 //
5 // Original author: Matt Chambers <matt.chambers .@. vanderbilt.edu>
6 //
7 // Copyright 2006 Louis Warschaw Prostate Cancer Center
8 // Cedars Sinai Medical Center, Los Angeles, California 90048
9 // Copyright 2008 Vanderbilt University - Nashville, TN 37232
10 //
11 // Licensed under the Apache License, Version 2.0 (the "License");
12 // you may not use this file except in compliance with the License.
13 // You may obtain a copy of the License at
14 //
15 // http://www.apache.org/licenses/LICENSE-2.0
16 //
17 // Unless required by applicable law or agreed to in writing, software
18 // distributed under the License is distributed on an "AS IS" BASIS,
19 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 // See the License for the specific language governing permissions and
21 // limitations under the License.
22 //
23 
24 
26 #include "Peptide.hpp"
27 #include "Digestion.hpp"
29 #include "boost/thread/thread.hpp"
30 #include "boost/thread/barrier.hpp"
31 #include "boost/exception/all.hpp"
32 #include "boost/foreach_field.hpp"
33 
34 
35 using namespace pwiz::cv;
36 using namespace pwiz::util;
37 using namespace pwiz::proteome;
38 
39 
40 ostream* os_ = 0;
41 
42 
44 {
45  const set<CVID>& cleavageAgents = Digestion::getCleavageAgents();
46  const vector<string>& cleavageAgentNames = Digestion::getCleavageAgentNames();
47 
48  if (os_)
49  {
50  *os_ << "Cleavage agents:" << endl;
51  BOOST_FOREACH(CVID agentCvid, cleavageAgents)
52  {
53  *os_ << cvTermInfo(agentCvid).name << " ("
54  << Digestion::getCleavageAgentRegex(agentCvid)
55  << ")" << endl;
56  }
57 
58  *os_ << "\nCleavage agent names" << endl;
59  BOOST_FOREACH(string agentName, cleavageAgentNames)
60  {
61  *os_ << agentName << endl;
62  }
63  }
64 
65  unit_assert(cleavageAgents.size() >= 14);
66  unit_assert_operator_equal(MS_Trypsin, *cleavageAgents.begin());
67  unit_assert(!cleavageAgents.count(MS_NoEnzyme_OBSOLETE));
68  unit_assert(cleavageAgents.count(MS_no_cleavage));
69  unit_assert(cleavageAgents.count(MS_unspecific_cleavage));
70 
71  unit_assert_operator_equal(MS_Trypsin, Digestion::getCleavageAgentByName("TRYPSIN"));
72  unit_assert_operator_equal(MS_Trypsin, Digestion::getCleavageAgentByName("trypsin"));
73  unit_assert_operator_equal(MS_Trypsin_P, Digestion::getCleavageAgentByName("TRYPSIN/P"));
74  unit_assert_operator_equal(MS_Trypsin_P, Digestion::getCleavageAgentByName("trypsin/p"));
75  unit_assert_operator_equal(CVID_Unknown, Digestion::getCleavageAgentByName("ion trap"));
76  unit_assert_operator_equal(CVID_Unknown, Digestion::getCleavageAgentByName("!@#$%^&*"));
77 
78  unit_assert_operator_equal(MS_Trypsin, Digestion::getCleavageAgentByRegex("(?<=[KR])(?!P)"));
79  unit_assert_operator_equal(MS_Trypsin_P, Digestion::getCleavageAgentByRegex("(?<=[KR])"));
80  unit_assert_operator_equal(MS_Lys_C_P, Digestion::getCleavageAgentByRegex("(?<=K)"));
81  unit_assert_operator_equal(CVID_Unknown, Digestion::getCleavageAgentByRegex("!@#$%^&*"));
82 
83  unit_assert_operator_equal(MS_Trypsin, Digestion::getCleavageAgentByName(cleavageAgentNames[0]));
84  unit_assert_operator_equal(MS_Arg_C, Digestion::getCleavageAgentByName(cleavageAgentNames[1]));
85  unit_assert_operator_equal(MS_Asp_N, Digestion::getCleavageAgentByName(cleavageAgentNames[2]));
86  unit_assert_operator_equal(MS_Asp_N_ambic, Digestion::getCleavageAgentByName(cleavageAgentNames[3]));
87  unit_assert_operator_equal(MS_Chymotrypsin, Digestion::getCleavageAgentByName(cleavageAgentNames[4]));
88  unit_assert_operator_equal(MS_CNBr, Digestion::getCleavageAgentByName(cleavageAgentNames[5]));
89  unit_assert_operator_equal(MS_Formic_acid, Digestion::getCleavageAgentByName(cleavageAgentNames[6]));
90  unit_assert_operator_equal(MS_Lys_C, Digestion::getCleavageAgentByName(cleavageAgentNames[7]));
91  unit_assert_operator_equal(MS_Lys_C_P, Digestion::getCleavageAgentByName(cleavageAgentNames[8]));
92  unit_assert_operator_equal(MS_PepsinA, Digestion::getCleavageAgentByName(cleavageAgentNames[9]));
93  unit_assert_operator_equal(MS_TrypChymo, Digestion::getCleavageAgentByName(cleavageAgentNames[10]));
94  unit_assert_operator_equal(MS_Trypsin_P, Digestion::getCleavageAgentByName(cleavageAgentNames[11]));
95  unit_assert_operator_equal(MS_V8_DE, Digestion::getCleavageAgentByName(cleavageAgentNames[12]));
96  unit_assert_operator_equal(MS_V8_E, Digestion::getCleavageAgentByName(cleavageAgentNames[13]));
97 
98  unit_assert_operator_equal("(?<=[KR])(?!P)", Digestion::getCleavageAgentRegex(MS_Trypsin));
99  unit_assert_operator_equal("(?<=[EZ])(?!P)", Digestion::getCleavageAgentRegex(MS_V8_E));
100  unit_assert_throws(Digestion::getCleavageAgentRegex(MS_ion_trap), std::invalid_argument);
101 
102  unit_assert_operator_equal("(?=[BD])", Digestion::getCleavageAgentRegex(MS_Asp_N));
103  unit_assert_operator_equal("(?=[BNDD])", Digestion::disambiguateCleavageAgentRegex(Digestion::getCleavageAgentRegex(MS_Asp_N)));
104  unit_assert_operator_equal("(?=[A-Z])", Digestion::disambiguateCleavageAgentRegex("(?=X)"));
105  unit_assert_operator_equal("(?=[A-Z])", Digestion::disambiguateCleavageAgentRegex("(?=[X])"));
106  unit_assert_operator_equal("(?![BND])", Digestion::disambiguateCleavageAgentRegex("(?!B)"));
107  unit_assert_operator_equal("(?<![JIL])(?=[BNDK])", Digestion::disambiguateCleavageAgentRegex("(?<![J])(?=[BK])"));
108 }
109 
110 
112 {
113  bool operator() (const DigestedPeptide& lhs, const DigestedPeptide& rhs) const
114  {
115  return lhs.sequence() < rhs.sequence();
116  }
117 };
118 
120  const string& expectedSequence,
121  size_t expectedOffset,
122  size_t expectedMissedCleavages,
123  size_t expectedSpecificTermini,
124  const string& expectedPrefix,
125  const string& expectedSuffix)
126 {
127  try
128  {
129  unit_assert_operator_equal(expectedSequence, peptide.sequence());
130  unit_assert_operator_equal(expectedOffset, peptide.offset());
131  unit_assert_operator_equal(expectedMissedCleavages, peptide.missedCleavages());
132  unit_assert_operator_equal(expectedSpecificTermini, peptide.specificTermini());
133  unit_assert_operator_equal(expectedPrefix, peptide.NTerminusPrefix());
134  unit_assert_operator_equal(expectedSuffix, peptide.CTerminusSuffix());
135  return true;
136  }
137  catch(exception& e)
138  {
139  cerr << "Testing peptide " << peptide.sequence() << ": " << e.what() << endl;
140  return false;
141  }
142 }
143 
144 void testTrypticBSA(const Digestion& trypticDigestion)
145 {
146  if (os_) *os_ << "Fully-specific BSA digest (offset, missed cleavages, specific termini, length, sequence)" << endl;
147 
148  vector<DigestedPeptide> trypticPeptides(trypticDigestion.begin(), trypticDigestion.end());
149  set<DigestedPeptide, DigestedPeptideLessThan> trypticPeptideSet(trypticPeptides.begin(), trypticPeptides.end());
150 
151  if (os_)
152  {
153  BOOST_FOREACH(const DigestedPeptide& peptide, trypticPeptides)
154  {
155  *os_ << peptide.offset() << "\t" << peptide.missedCleavages() << "\t" <<
156  peptide.specificTermini() << "\t" << peptide.sequence().length() <<
157  "\t" << peptide.sequence() << "\n";
158  }
159  }
160 
161  // test count
162  unit_assert(trypticPeptides.size() > 4);
163 
164  // test order of enumeration and metadata: sequence, Off, NMC, NTT, Pre, Suf
165  unit_assert(testDigestionMetadata(trypticPeptides[0], "MKWVTFISLLLLFSSAYSR", 0, 1, 2, "", "G"));
166  unit_assert(testDigestionMetadata(trypticPeptides[1], "MKWVTFISLLLLFSSAYSRGVFR", 0, 2, 2, "", "R"));
167  unit_assert(testDigestionMetadata(trypticPeptides[2], "MKWVTFISLLLLFSSAYSRGVFRR", 0, 3, 2, "", "D"));
168  unit_assert(testDigestionMetadata(trypticPeptides[3], "KWVTFISLLLLFSSAYSR", 1, 1, 2, "M", "G"));
169 
170  // test for non-tryptic peptides
171  unit_assert(!trypticPeptideSet.count("MKWVTFISLLLL"));
172  unit_assert(!trypticPeptideSet.count("STQTALA"));
173 
174  // test some middle peptides
175  unit_assert(trypticPeptideSet.count("RDTHKSEIAHRFK"));
176  unit_assert(trypticPeptideSet.count("DTHKSEIAHRFK"));
177 
178  // test trypticPeptides at the C terminus
179  unit_assert(trypticPeptideSet.count("EACFAVEGPKLVVSTQTALA"));
180  unit_assert(trypticPeptides.back().sequence() == "LVVSTQTALA");
181 
182  // test maximum missed cleavages
183  unit_assert(!trypticPeptideSet.count("MKWVTFISLLLLFSSAYSRGVFRRDTHK"));
184  unit_assert(!trypticPeptideSet.count("LKPDPNTLCDEFKADEKKFWGKYLYEIARR"));
185 
186  // test minimum peptide length
187  unit_assert(!trypticPeptideSet.count("LR"));
188  unit_assert(!trypticPeptideSet.count("QRLR"));
189  unit_assert(trypticPeptideSet.count("VLASSARQRLR"));
190 
191  // test maximum peptide length
192  unit_assert(!trypticPeptideSet.count("MKWVTFISLLLLFSSAYSRGVFRRDTHKSEIAHRFKDLGEEHFK"));
193 
194  // test methionine clipping at the N-terminus
195  unit_assert(trypticPeptideSet.count("KWVTFISLLLLFSSAYSR"));
196 }
197 
198 void testSemitrypticBSA(const Digestion& semitrypticDigestion)
199 {
200  if (os_) *os_ << "Semi-specific BSA digest (offset, missed cleavages, specific termini, length, sequence)" << endl;
201 
202  set<DigestedPeptide, DigestedPeptideLessThan>::const_iterator peptideItr;
203 
204  vector<DigestedPeptide> semitrypticPeptides(semitrypticDigestion.begin(), semitrypticDigestion.end());
205  set<DigestedPeptide, DigestedPeptideLessThan> semitrypticPeptideSet(semitrypticPeptides.begin(), semitrypticPeptides.end());
206 
207  if (os_)
208  {
209  BOOST_FOREACH(DigestedPeptide peptide, semitrypticPeptides)
210  {
211  *os_ << peptide.offset() << "\t" << peptide.missedCleavages() << "\t" <<
212  peptide.specificTermini() << "\t" << peptide.sequence().length() <<
213  "\t" << peptide.sequence() << "\n";
214  }
215  }
216 
217  // test count
218  unit_assert(semitrypticPeptides.size() > 3);
219 
220  // test order of enumeration and peptides at the N terminus
221  unit_assert_operator_equal("MKWVT", semitrypticPeptides[0].sequence());
222  unit_assert_operator_equal("MKWVTF", semitrypticPeptides[1].sequence());
223  unit_assert_operator_equal("MKWVTFI", semitrypticPeptides[2].sequence());
224 
225  // test order of enumeration and peptides at the C terminus
226  unit_assert_operator_equal("QTALA", semitrypticPeptides.rbegin()->sequence());
227  unit_assert_operator_equal("TQTALA", (semitrypticPeptides.rbegin()+1)->sequence());
228  unit_assert_operator_equal("STQTALA", (semitrypticPeptides.rbegin()+2)->sequence());
229  unit_assert_operator_equal("LVVSTQTALA", (semitrypticPeptides.rbegin()+5)->sequence());
230  unit_assert_operator_equal("LVVSTQTAL", (semitrypticPeptides.rbegin()+6)->sequence());
231  unit_assert_operator_equal("LVVST", (semitrypticPeptides.rbegin()+10)->sequence());
232 
233  // test digestion metadata
234  unit_assert_operator_equal(0, semitrypticPeptides[0].offset());
235  unit_assert_operator_equal(1, semitrypticPeptides[0].missedCleavages());
236  unit_assert_operator_equal(1, semitrypticPeptides[0].specificTermini());
237  unit_assert(semitrypticPeptides[0].NTerminusIsSpecific() &&
238  !semitrypticPeptides[0].CTerminusIsSpecific());
239 
240  peptideItr = semitrypticPeptideSet.find("MKWVTFISLLLLFSSAYSR");
241  unit_assert(peptideItr != semitrypticPeptideSet.end());
242  unit_assert_operator_equal(0, peptideItr->offset());
243  unit_assert_operator_equal(1, peptideItr->missedCleavages());
244  unit_assert_operator_equal(2, peptideItr->specificTermini());
245  unit_assert(peptideItr->NTerminusIsSpecific() &&
246  peptideItr->CTerminusIsSpecific());
247 
248  peptideItr = semitrypticPeptideSet.find("KWVTFISLLLLFSSAYSR");
249  unit_assert(peptideItr != semitrypticPeptideSet.end());
250  unit_assert_operator_equal(1, peptideItr->offset());
251  unit_assert_operator_equal(1, peptideItr->missedCleavages());
252  unit_assert_operator_equal(2, peptideItr->specificTermini());
253  unit_assert(peptideItr->NTerminusIsSpecific() &&
254  peptideItr->CTerminusIsSpecific());
255 
256  peptideItr = semitrypticPeptideSet.find("KWVTFISLLLLFSSAYSRG"); // 2 missed cleavages
257  unit_assert(peptideItr == semitrypticPeptideSet.end());
258 
259  peptideItr = semitrypticPeptideSet.find("WVTFISLLLLFSSAYSR");
260  unit_assert(peptideItr != semitrypticPeptideSet.end());
261  unit_assert_operator_equal(2, peptideItr->offset());
262  unit_assert_operator_equal(0, peptideItr->missedCleavages());
263  unit_assert_operator_equal(2, peptideItr->specificTermini());
264  unit_assert(peptideItr->NTerminusIsSpecific() &&
265  peptideItr->CTerminusIsSpecific());
266 
267  peptideItr = semitrypticPeptideSet.find("WVTFISLLLLFSSAYSRG");
268  unit_assert(peptideItr != semitrypticPeptideSet.end());
269  unit_assert_operator_equal(2, peptideItr->offset());
270  unit_assert_operator_equal(1, peptideItr->missedCleavages());
271  unit_assert_operator_equal(1, peptideItr->specificTermini());
272  unit_assert(peptideItr->NTerminusIsSpecific() &&
273  !peptideItr->CTerminusIsSpecific());
274 
275  peptideItr = semitrypticPeptideSet.find("VTFISLLLLFSSAYSRG"); // non-tryptic
276  unit_assert(peptideItr == semitrypticPeptideSet.end());
277 
278  // test for non-specific peptides
279  unit_assert(semitrypticPeptideSet.count("WVTFISLLLLFSSAYSR")); // tryptic
280  unit_assert(semitrypticPeptideSet.count("VTFISLLLLFSSAYSR")); // semi-tryptic
281  unit_assert(!semitrypticPeptideSet.count("VTFISLLLLFSSAYS")); // non-tryptic
282 
283  // test semi-specific peptides at the C terminus
284  unit_assert(semitrypticPeptideSet.count("FAVEGPKLVVSTQTALA")); // semi-tryptic
285  unit_assert(!semitrypticPeptideSet.count("FAVEGPKLVVSTQTAL")); // non-tryptic
286 }
287 
288 void testNontrypticBSA(const Digestion& nontrypticDigestion)
289 {
290  if (os_) *os_ << "Non-specific BSA digest (offset, missed cleavages, specific termini, length, sequence)" << endl;
291 
292  set<DigestedPeptide, DigestedPeptideLessThan>::const_iterator peptideItr;
293 
294  vector<DigestedPeptide> nontrypticPeptides(nontrypticDigestion.begin(), nontrypticDigestion.end());
295  set<DigestedPeptide, DigestedPeptideLessThan> nontrypticPeptideSet(nontrypticPeptides.begin(), nontrypticPeptides.end());
296 
297  if (os_)
298  {
299  BOOST_FOREACH(DigestedPeptide peptide, nontrypticPeptides)
300  {
301  *os_ << peptide.offset() << "\t" << peptide.missedCleavages() << "\t" <<
302  peptide.specificTermini() << "\t" << peptide.sequence().length() <<
303  "\t" << peptide.sequence() << "\n";
304  }
305  }
306 
307  // test count
308  unit_assert(nontrypticPeptides.size() > 3);
309 
310  // test order of enumeration and peptides at the N terminus
311  unit_assert_operator_equal("MKWVT", nontrypticPeptides[0].sequence());
312  unit_assert_operator_equal("MKWVTF", nontrypticPeptides[1].sequence());
313  unit_assert_operator_equal("MKWVTFI", nontrypticPeptides[2].sequence());
314 
315  // test digestion metadata
316  unit_assert_operator_equal(0, nontrypticPeptides[0].offset());
317  unit_assert_operator_equal(1, nontrypticPeptides[0].missedCleavages());
318  unit_assert_operator_equal(1, nontrypticPeptides[0].specificTermini());
319  unit_assert(nontrypticPeptides[0].NTerminusIsSpecific() &&
320  !nontrypticPeptides[0].CTerminusIsSpecific());
321 
322  peptideItr = nontrypticPeptideSet.find("MKWVTFISLLLLFSSAYSR");
323  unit_assert(peptideItr != nontrypticPeptideSet.end());
324  unit_assert_operator_equal(0, peptideItr->offset());
325  unit_assert_operator_equal(1, peptideItr->missedCleavages());
326  unit_assert_operator_equal(2, peptideItr->specificTermini());
327  unit_assert(peptideItr->NTerminusIsSpecific() &&
328  peptideItr->CTerminusIsSpecific());
329 
330  peptideItr = nontrypticPeptideSet.find("KWVTFISLLLLFSSAYSR");
331  unit_assert(peptideItr != nontrypticPeptideSet.end());
332  unit_assert_operator_equal(1, peptideItr->offset());
333  unit_assert_operator_equal(1, peptideItr->missedCleavages());
334  unit_assert_operator_equal(2, peptideItr->specificTermini());
335  unit_assert(peptideItr->NTerminusIsSpecific() &&
336  peptideItr->CTerminusIsSpecific());
337 
338  peptideItr = nontrypticPeptideSet.find("KWVTFISLLLLFSSAYSRG"); // 2 missed cleavages
339  unit_assert(peptideItr == nontrypticPeptideSet.end());
340 
341  peptideItr = nontrypticPeptideSet.find("WVTFISLLLLFSSAYSR");
342  unit_assert(peptideItr != nontrypticPeptideSet.end());
343  unit_assert_operator_equal(2, peptideItr->offset());
344  unit_assert_operator_equal(0, peptideItr->missedCleavages());
345  unit_assert_operator_equal(2, peptideItr->specificTermini());
346  unit_assert(peptideItr->NTerminusIsSpecific() &&
347  peptideItr->CTerminusIsSpecific());
348 
349  peptideItr = nontrypticPeptideSet.find("WVTFISLLLLFSSAYSRG");
350  unit_assert(peptideItr != nontrypticPeptideSet.end());
351  unit_assert_operator_equal(2, peptideItr->offset());
352  unit_assert_operator_equal(1, peptideItr->missedCleavages());
353  unit_assert_operator_equal(1, peptideItr->specificTermini());
354  unit_assert(peptideItr->NTerminusIsSpecific() &&
355  !peptideItr->CTerminusIsSpecific());
356 
357  peptideItr = nontrypticPeptideSet.find("VTFISLLLLFSSAYSRG");
358  unit_assert(peptideItr != nontrypticPeptideSet.end());
359  unit_assert_operator_equal(3, peptideItr->offset());
360  unit_assert_operator_equal(1, peptideItr->missedCleavages());
361  unit_assert_operator_equal(0, peptideItr->specificTermini());
362  unit_assert(!peptideItr->NTerminusIsSpecific() &&
363  !peptideItr->CTerminusIsSpecific());
364 
365  // test for peptides of all specificities
366  unit_assert(nontrypticPeptideSet.count("WVTFISLLLLFSSAYSR")); // tryptic
367  unit_assert(nontrypticPeptideSet.count("VTFISLLLLFSSAYSR")); // semi-tryptic
368  unit_assert(nontrypticPeptideSet.count("VTFISLLLLFSSAYS")); // non-tryptic
369 
370  // test non-specific peptides at the C terminus
371  unit_assert(nontrypticPeptideSet.count("FAVEGPKLVVSTQTALA")); // semi-tryptic
372  unit_assert(nontrypticPeptideSet.count("FAVEGPKLVVSTQTAL")); // non-tryptic
373  unit_assert_operator_equal("QTALA", nontrypticPeptides.back().sequence()); // semi-tryptic
374 
375  // test maximum missed cleavages
376  unit_assert(nontrypticPeptideSet.count("KWVTFISLLLLFSSAYSR"));
377  unit_assert(!nontrypticPeptideSet.count("KWVTFISLLLLFSSAYSRG"));
378 
379  // test minimum peptide length
380  unit_assert(!nontrypticPeptideSet.count("LR"));
381  unit_assert(!nontrypticPeptideSet.count("QRLR"));
382  unit_assert(nontrypticPeptideSet.count("VLASSAR"));
383 
384  // test maximum peptide length
385  unit_assert(!nontrypticPeptideSet.count("EYEATLEECCAKDDPHACYSTVFDK"));
386 }
387 
388 void testSemitrypticMethionineClippingBSA(const Digestion& semitrypticDigestion)
389 {
390  if (os_) *os_ << "Semi-specific BSA digest w/ methionine clipping (offset, missed cleavages, specific termini, length, sequence)" << endl;
391 
392  set<DigestedPeptide, DigestedPeptideLessThan>::const_iterator peptideItr;
393 
394  vector<DigestedPeptide> semitrypticPeptides(semitrypticDigestion.begin(), semitrypticDigestion.end());
395  set<DigestedPeptide, DigestedPeptideLessThan> semitrypticPeptideSet(semitrypticPeptides.begin(), semitrypticPeptides.end());
396 
397  if (os_)
398  {
399  BOOST_FOREACH(DigestedPeptide peptide, semitrypticPeptides)
400  {
401  *os_ << peptide.offset() << "\t" << peptide.missedCleavages() << "\t" <<
402  peptide.specificTermini() << "\t" << peptide.sequence().length() <<
403  "\t" << peptide.sequence() << "\n";
404  }
405  }
406 
407  // test count
408  unit_assert(semitrypticPeptides.size() > 3);
409 
410  // test order of enumeration and peptides at the N terminus;
411  // even with methionine clipping, MKWVT contains just one missed cleavage
412  unit_assert_operator_equal("MKWVT", semitrypticPeptides[0].sequence());
413  unit_assert_operator_equal("MKWVTF", semitrypticPeptides[1].sequence());
414  unit_assert_operator_equal("MKWVTFI", semitrypticPeptides[2].sequence());
415 
416  // test order of enumeration and peptides at the C terminus
417  unit_assert_operator_equal("QTALA", semitrypticPeptides.rbegin()->sequence());
418  unit_assert_operator_equal("TQTALA", (semitrypticPeptides.rbegin()+1)->sequence());
419  unit_assert_operator_equal("STQTALA", (semitrypticPeptides.rbegin()+2)->sequence());
420  unit_assert_operator_equal("LVVSTQTALA", (semitrypticPeptides.rbegin()+5)->sequence());
421  unit_assert_operator_equal("LVVSTQTAL", (semitrypticPeptides.rbegin()+6)->sequence());
422  unit_assert_operator_equal("LVVST", (semitrypticPeptides.rbegin()+10)->sequence());
423 
424  // test digestion metadata ([0]: MKWVT)
425  unit_assert_operator_equal(0, semitrypticPeptides[0].offset());
426  unit_assert_operator_equal(1, semitrypticPeptides[0].missedCleavages());
427  unit_assert_operator_equal(1, semitrypticPeptides[0].specificTermini());
428  unit_assert(semitrypticPeptides[0].NTerminusIsSpecific() &&
429  !semitrypticPeptides[0].CTerminusIsSpecific());
430 
431  peptideItr = semitrypticPeptideSet.find("KWVTFISLLLLFSSAYS"); // clipped methionine
432  unit_assert(peptideItr != semitrypticPeptideSet.end());
433  unit_assert_operator_equal(1, peptideItr->offset());
434  unit_assert_operator_equal(1, peptideItr->missedCleavages());
435  unit_assert_operator_equal(1, peptideItr->specificTermini());
436  unit_assert(peptideItr->NTerminusIsSpecific());
437 
438  peptideItr = semitrypticPeptideSet.find("KWVTFISLLLLFSSAYSR"); // clipped methionine
439  unit_assert(peptideItr != semitrypticPeptideSet.end());
440  unit_assert_operator_equal(1, peptideItr->offset());
441  unit_assert_operator_equal(1, peptideItr->missedCleavages());
442  unit_assert_operator_equal(2, peptideItr->specificTermini());
443  unit_assert(peptideItr->NTerminusIsSpecific() &&
444  peptideItr->CTerminusIsSpecific());
445 
446  peptideItr = semitrypticPeptideSet.find("KWVTFISLLLLFSSAYSRG"); // 2 missed cleavages
447  unit_assert(peptideItr == semitrypticPeptideSet.end());
448 
449  peptideItr = semitrypticPeptideSet.find("WVTFISLLLLFSSAYSR");
450  unit_assert(peptideItr != semitrypticPeptideSet.end());
451  unit_assert_operator_equal(2, peptideItr->offset());
452  unit_assert_operator_equal(0, peptideItr->missedCleavages());
453  unit_assert_operator_equal(2, peptideItr->specificTermini());
454  unit_assert(peptideItr->NTerminusIsSpecific() &&
455  peptideItr->CTerminusIsSpecific());
456 
457  peptideItr = semitrypticPeptideSet.find("WVTFISLLLLFSSAYSRG");
458  unit_assert(peptideItr != semitrypticPeptideSet.end());
459  unit_assert_operator_equal(2, peptideItr->offset());
460  unit_assert_operator_equal(1, peptideItr->missedCleavages());
461  unit_assert_operator_equal(1, peptideItr->specificTermini());
462  unit_assert(peptideItr->NTerminusIsSpecific() &&
463  !peptideItr->CTerminusIsSpecific());
464 
465  peptideItr = semitrypticPeptideSet.find("VTFISLLLLFSSAYSRG"); // non-tryptic
466  unit_assert(peptideItr == semitrypticPeptideSet.end());
467 
468  // test for non-specific peptides
469  unit_assert(semitrypticPeptideSet.count("WVTFISLLLLFSSAYSR")); // tryptic
470  unit_assert(semitrypticPeptideSet.count("KWVTFISLLLLFSSAYSR")); // semi-tryptic
471  unit_assert(semitrypticPeptideSet.count("KWVTFISLLLLFSSAYS")); // clipped methionine & semi-specific
472  unit_assert(!semitrypticPeptideSet.count("VTFISLLLLFSSAYS")); // non-specific
473 
474  // test semi-specific peptides at the C terminus
475  unit_assert(semitrypticPeptideSet.count("FAVEGPKLVVSTQTALA")); // semi-tryptic
476  unit_assert(!semitrypticPeptideSet.count("FAVEGPKLVVSTQTAL")); // non-tryptic
477 }
478 
480 {
481  if (os_) *os_ << "BSA digestion test" << endl;
482 
483  // >P02769|ALBU_BOVIN Serum albumin - Bos taurus (Bovine).
484  Peptide bsa("MKWVTFISLLLLFSSAYSRGVFRRDTHKSEIAHRFKDLGEEHFKGLVLIAFSQYLQQCPF"
485  "DEHVKLVNELTEFAKTCVADESHAGCEKSLHTLFGDELCKVASLRETYGDMADCCEKQEP"
486  "ERNECFLSHKDDSPDLPKLKPDPNTLCDEFKADEKKFWGKYLYEIARRHPYFYAPELLYY"
487  "ANKYNGVFQECCQAEDKGACLLPKIETMREKVLASSARQRLRCASIQKFGERALKAWSVA"
488  "RLSQKFPKAEFVEVTKLVTDLTKVHKECCHGDLLECADDRADLAKYICDNQDTISSKLKE"
489  "CCDKPLLEKSHCIAEVEKDAIPENLPPLTADFAEDKDVCKNYQEAKDAFLGSFLYEYSRR"
490  "HPEYAVSVLLRLAKEYEATLEECCAKDDPHACYSTVFDKLKHLVDEPQNLIKQNCDQFEK"
491  "LGEYGFQNALIVRYTRKVPQVSTPTLVEVSRSLGKVGTRCCTKPESERMPCTEDYLSLIL"
492  "NRLCVLHEKTPVSEKVTKCCTESLVNRRPCFSALTPDETYVPKAFDEKLFTFHADICTLP"
493  "DTEKQIKKQTALVELLKHKPKATEEQLKTVMENFVAFVDKCCAADDKEACFAVEGPKLVV"
494  "STQTALA");
495 
496  // test fully-specific trypsin digest
498  testTrypticBSA(Digestion(bsa, boost::regex("(?<=[KR])"), Digestion::Config(3, 5, 40)));
499 
500  // test semi-specific trypsin digest
502  testSemitrypticBSA(Digestion(bsa, boost::regex("(?<=[KR])"), Digestion::Config(1, 5, 20, Digestion::SemiSpecific)));
503 
504  // test non-specific trypsin digest
506  testNontrypticBSA(Digestion(bsa, boost::regex("(?<=[KR])"), Digestion::Config(1, 5, 20, Digestion::NonSpecific)));
507 
508  // test semi-specific trypsin digest with n-terminal methionine clipping (motif and regex only)
509  testSemitrypticMethionineClippingBSA(Digestion(bsa, boost::regex("(?<=^M)|(?<=[KR])"), Digestion::Config(1, 5, 20, Digestion::SemiSpecific)));
510  testSemitrypticMethionineClippingBSA(Digestion(bsa, boost::regex("(?<=(^M)|([KR]))"), Digestion::Config(1, 5, 20, Digestion::SemiSpecific)));
511 
512  // test funky digestion
513  Digestion funkyDigestion(bsa, boost::regex("(?<=A[DE])(?=[FG])"), Digestion::Config(0, 5, 100000, Digestion::FullySpecific, false));
514  vector<Peptide> funkyPeptides(funkyDigestion.begin(), funkyDigestion.end());
515 
516  unit_assert_operator_equal("MKWVTFISLLLLFSSAYSRGVFRRDTHKSEIAHRFKDLGEEHFKGLVLIAFSQYLQQCPFDEHVKLVNELTEFAKTCVADESHAGCEKSLHTLFGDELCKVASLRETYGDMADCCEKQEPERNECFLSHKDDSPDLPKLKPDPNTLCDEFKADEKKFWGKYLYEIARRHPYFYAPELLYYANKYNGVFQECCQAEDKGACLLPKIETMREKVLASSARQRLRCASIQKFGERALKAWSVARLSQKFPKAE", funkyPeptides[0].sequence());
517  unit_assert_operator_equal("FVEVTKLVTDLTKVHKECCHGDLLECADDRADLAKYICDNQDTISSKLKECCDKPLLEKSHCIAEVEKDAIPENLPPLTAD", funkyPeptides[1].sequence());
518  unit_assert_operator_equal("FAEDKDVCKNYQEAKDAFLGSFLYEYSRRHPEYAVSVLLRLAKEYEATLEECCAKDDPHACYSTVFDKLKHLVDEPQNLIKQNCDQFEKLGEYGFQNALIVRYTRKVPQVSTPTLVEVSRSLGKVGTRCCTKPESERMPCTEDYLSLILNRLCVLHEKTPVSEKVTKCCTESLVNRRPCFSALTPDETYVPKAFDEKLFTFHADICTLPDTEKQIKKQTALVELLKHKPKATEEQLKTVMENFVAFVDKCCAADDKEACFAVEGPKLVVSTQTALA", funkyPeptides[2].sequence());
519 
520  // test fully specific Asp-N digest (thus testing ambiguous residue disambiguation)
521  Digestion aspnDigestion(bsa, MS_Asp_N, Digestion::Config(0, 5, 100000, Digestion::FullySpecific, false));
522  vector<Peptide> aspnPeptides(aspnDigestion.begin(), aspnDigestion.end());
523  unit_assert_operator_equal("MKWVTFISLLLLFSSAYSRGVFRR", aspnPeptides[0].sequence());
524  unit_assert_operator_equal("DTHKSEIAHRFK", aspnPeptides[1].sequence());
525  unit_assert_operator_equal("DLGEEHFKGLVLIAFSQYLQQCPF", aspnPeptides[2].sequence());
526  unit_assert_operator_equal("DEHVKLV", aspnPeptides[3].sequence());
527  unit_assert_operator_equal("NELTEFAKTCVA", aspnPeptides[4].sequence());
528 
529  // test no cleavage "digestion"
530  Digestion noCleavageDigestion("ELVISLIVESK", MS_no_cleavage);
531  vector<Peptide> noCleavagePeptides(noCleavageDigestion.begin(), noCleavageDigestion.end());
532 
533  unit_assert_operator_equal(1, noCleavagePeptides.size());
534  unit_assert_operator_equal("ELVISLIVESK", noCleavagePeptides[0].sequence());
535 
536  // test unspecific cleavage digestion
537  Digestion unspecificCleavageDigestion("ELVISLK", MS_unspecific_cleavage, Digestion::Config(0, 5, 5, Digestion::FullySpecific, false));
538  vector<Peptide> unspecificCleavagePeptides(unspecificCleavageDigestion.begin(), unspecificCleavageDigestion.end());
539 
540  unit_assert_operator_equal(3, unspecificCleavagePeptides.size());
541  unit_assert_operator_equal("ELVIS", unspecificCleavagePeptides[0].sequence());
542  unit_assert_operator_equal("LVISL", unspecificCleavagePeptides[1].sequence());
543  unit_assert_operator_equal("VISLK", unspecificCleavagePeptides[2].sequence());
544 }
545 
546 
547 void testFind()
548 {
549  Digestion fully("PEPKTIDEKPEPTIDERPEPKTIDEKKKPEPTIDER", MS_Lys_C_P, Digestion::Config(2, 5, 10));
550  Digestion semi("PEPKTIDEKPEPTIDERPEPKTIDEKKKPEPTIDER", MS_Lys_C_P, Digestion::Config(2, 5, 10, Digestion::SemiSpecific));
551  Digestion non("PEPKTIDEKPEPTIDERPEPKTIDEKKKPEPTIDER", MS_Lys_C_P, Digestion::Config(2, 5, 10, Digestion::NonSpecific));
552  Digestion clipped("MPEPKTIDEKPEPTIDERPEPKTIDEKKKPEPTIDER", MS_Lys_C_P, Digestion::Config(2, 5, 10));
553 
554  // test find_all
555  unit_assert(fully.find_all("ABC").empty()); // not in peptide
556  unit_assert(fully.find_all("PEPK").empty()); // too short
557  unit_assert(fully.find_all("PEPKTIDEKK").empty()); // no N-terminal cleavage
558  unit_assert(fully.find_all("PEPTIDERPEPK").empty()); // too long
559  unit_assert(fully.find_all("PEPTIDERPEPTIDEK").empty()); // too long
560  unit_assert(semi.find_all("PEPKTIDEKKK").empty()); // too many missed cleavages
561  unit_assert(semi.find_all("EPKTIDE").empty()); // no specific termini
562  unit_assert(non.find_all("EPKTIDEKKK").empty()); // too many missed cleavages
563 
564  unit_assert(fully.find_all("PEPKTIDEK").size() == 1);
565  unit_assert(testDigestionMetadata(fully.find_all("PEPKTIDEK")[0], "PEPKTIDEK", 0, 1, 2, "", "P"));
566 
567  unit_assert(fully.find_all("TIDEK").size() == 2);
568  unit_assert(testDigestionMetadata(fully.find_all("TIDEK")[0], "TIDEK", 4, 0, 2, "K", "P"));
569  unit_assert(testDigestionMetadata(fully.find_all("TIDEK")[1], "TIDEK", 21, 0, 2, "K", "K"));
570 
571  unit_assert(fully.find_all("TIDEKK").size() == 1);
572  unit_assert(testDigestionMetadata(fully.find_all("TIDEKK")[0], "TIDEKK", 21, 1, 2, "K", "K"));
573 
574  unit_assert(fully.find_all("TIDEKKK").size() == 1);
575  unit_assert(testDigestionMetadata(fully.find_all("TIDEKKK")[0], "TIDEKKK", 21, 2, 2, "K", "P"));
576 
577  unit_assert(fully.find_all("PEPTIDER").size() == 1);
578  unit_assert(testDigestionMetadata(fully.find_all("PEPTIDER")[0], "PEPTIDER", 28, 0, 2, "K", ""));
579 
580  unit_assert(semi.find_all("PEPKTIDEKK").size() == 1);
581  unit_assert(testDigestionMetadata(semi.find_all("PEPKTIDEKK")[0], "PEPKTIDEKK", 17, 2, 1, "R", "K"));
582 
583  unit_assert(semi.find_all("EPKTIDEKK").size() == 1);
584  unit_assert(testDigestionMetadata(semi.find_all("EPKTIDEKK")[0], "EPKTIDEKK", 18, 2, 1, "P", "K"));
585 
586  unit_assert(non.find_all("PEPKTIDE").size() == 2);
587  unit_assert(testDigestionMetadata(non.find_all("PEPKTIDE")[0], "PEPKTIDE", 0, 1, 1, "", "K"));
588  unit_assert(testDigestionMetadata(non.find_all("PEPKTIDE")[1], "PEPKTIDE", 17, 1, 0, "R", "K"));
589 
590  unit_assert(fully.find_all("EPKTIDEK").empty()); // N-terminal 'P' is not clipped
591  unit_assert(clipped.find_all("PEPKTIDEK").size() == 1); // N-terminal 'M' is clipped
592  unit_assert(testDigestionMetadata(clipped.find_all("PEPKTIDEK")[0], "PEPKTIDEK", 1, 1, 2, "M", "P"));
593 
594  // test find_first
595  unit_assert_throws(fully.find_first("ABC"), runtime_error); // not in peptide
596  unit_assert_throws(fully.find_first("PEPK"), runtime_error); // too short
597  unit_assert_throws(fully.find_first("PEPKTIDEKK"), runtime_error); // no N-terminal cleavage
598  unit_assert_throws(fully.find_first("PEPTIDERPEPK"), runtime_error); // too long
599  unit_assert_throws(fully.find_first("PEPTIDERPEPTIDEK"), runtime_error); // too long
600  unit_assert_throws(semi.find_first("EPKTIDE"), runtime_error); // no specific termini
601  unit_assert_throws(semi.find_first("PEPKTIDEKKK"), runtime_error); // too many missed cleavages
602  unit_assert_throws(non.find_first("PEPKTIDEKKK"), runtime_error); // too many missed cleavages
603 
604  unit_assert(testDigestionMetadata(fully.find_first("PEPKTIDEK"), "PEPKTIDEK", 0, 1, 2, "", "P"));
605  unit_assert(testDigestionMetadata(fully.find_first("PEPKTIDEK", 4242), "PEPKTIDEK", 0, 1, 2, "", "P"));
606 
607  unit_assert(testDigestionMetadata(fully.find_first("TIDEK"), "TIDEK", 4, 0, 2, "K", "P"));
608  unit_assert(testDigestionMetadata(fully.find_first("TIDEK", 4242), "TIDEK", 4, 0, 2, "K", "P"));
609  unit_assert(testDigestionMetadata(fully.find_first("TIDEK", 15), "TIDEK", 21, 0, 2, "K", "K"));
610  unit_assert(testDigestionMetadata(fully.find_first("TIDEK", 21), "TIDEK", 21, 0, 2, "K", "K"));
611 
612  unit_assert(testDigestionMetadata(fully.find_first("TIDEKK"), "TIDEKK", 21, 1, 2, "K", "K"));
613  unit_assert(testDigestionMetadata(fully.find_first("TIDEKKK"), "TIDEKKK", 21, 2, 2, "K", "P"));
614  unit_assert(testDigestionMetadata(fully.find_first("PEPTIDER"), "PEPTIDER", 28, 0, 2, "K", ""));
615 
616  unit_assert(testDigestionMetadata(semi.find_first("IDEKK"), "IDEKK", 22, 1, 1, "T", "K"));
617  unit_assert(testDigestionMetadata(semi.find_first("IDEKKK"), "IDEKKK", 22, 2, 1, "T", "P"));
618  unit_assert(testDigestionMetadata(semi.find_first("PEPTIDER"), "PEPTIDER", 9, 0, 1, "K", "P"));
619  unit_assert(testDigestionMetadata(semi.find_first("PEPTIDER", 28), "PEPTIDER", 28, 0, 2, "K", ""));
620 
621  unit_assert(testDigestionMetadata(non.find_first("EPTIDE"), "EPTIDE", 10, 0, 0, "P", "R"));
622  unit_assert(testDigestionMetadata(non.find_first("EPTIDE", 29), "EPTIDE", 29, 0, 0, "P", "R"));
623 }
624 
625 
627 {
628  boost::exception_ptr exception;
629 
631  ThreadStatus(const boost::exception_ptr& e) : exception(e) {}
632 };
633 
634 
635 void testThreadSafetyWorker(boost::barrier* testBarrier, ThreadStatus& status)
636 {
637  testBarrier->wait(); // wait until all threads have started
638 
639  try
640  {
643  testFind();
644  }
645  catch (exception& e)
646  {
647  status.exception = boost::copy_exception(runtime_error(e.what()));
648  }
649  catch (...)
650  {
651  status.exception = boost::copy_exception(runtime_error("Unhandled exception in worker thread."));
652  }
653 }
654 
655 void testThreadSafety(const int& testThreadCount)
656 {
657  using boost::thread;
658 
659  boost::barrier testBarrier(testThreadCount);
660  list<pair<boost::shared_ptr<thread>, ThreadStatus> > threads;
661  for (int i=0; i < testThreadCount; ++i)
662  {
663  threads.push_back(make_pair(boost::shared_ptr<thread>(), ThreadStatus()));
664  threads.back().first.reset(new thread(testThreadSafetyWorker, &testBarrier, boost::ref(threads.back().second)));
665  }
666 
667  set<boost::shared_ptr<thread> > finishedThreads;
668  while (finishedThreads.size() < threads.size())
669  BOOST_FOREACH_FIELD((boost::shared_ptr<thread>& t)(ThreadStatus& status), threads)
670  {
671  if (t->timed_join(boost::posix_time::seconds(1)))
672  finishedThreads.insert(t);
673 
674  if (status.exception != NULL) // non-null exception?
675  boost::rethrow_exception(status.exception);
676  }
677 }
678 
679 
680 int main(int argc, char* argv[])
681 {
682  TEST_PROLOG(argc, argv)
683 
684  if (argc>1 && !strcmp(argv[1],"-v")) os_ = &cout;
685  if (os_) *os_ << "DigestionTest\n";
686 
687  try
688  {
689  //testThreadSafety(1); // does not test thread-safety of singleton initialization
690  testThreadSafety(2);
691  testThreadSafety(4);
692  //testThreadSafety(8);
693  //testThreadSafety(16); // high thread count fails non-deterministically on MSVC; I haven't been able to find the cause.
694  }
695  catch (exception& e)
696  {
697  TEST_FAILED(e.what())
698  }
699  catch (...)
700  {
701  TEST_FAILED("Caught unknown exception.")
702  }
703 
705 }