ProteoWizard
Classes | Functions | Variables
Serializer_pepXML_Test.cpp File Reference
#include "Serializer_pepXML.hpp"
#include "Diff.hpp"
#include "References.hpp"
#include "examples.hpp"
#include "pwiz/utility/misc/unit.hpp"
#include "pwiz/utility/misc/Std.hpp"
#include "pwiz/utility/misc/Filesystem.hpp"
#include "pwiz/data/proteome/Digestion.hpp"
#include "TextWriter.hpp"
#include "boost/range/adaptor/transformed.hpp"
#include "boost/range/algorithm/max_element.hpp"
#include "boost/range/algorithm/min_element.hpp"
#include <cstring>

Go to the source code of this file.

Classes

struct  EnzymePtr_specificity
struct  EnzymePtr_missedCleavages

Functions

void stripUnmappedMetadata (IdentData &mzid)
void testTranslation (const string &str)
void testSerializeReally (IdentData &mzid, const Serializer_pepXML::Config &config)
void testSerialize ()
void testPepXMLSpecificity ()
void testStripChargeFromConventionalSpectrumId ()
int main (int argc, char **argv)

Variables

ostream * os_ = 0

Function Documentation

void stripUnmappedMetadata ( IdentData mzid)

Definition at line 57 of file Serializer_pepXML_Test.cpp.

References pwiz::identdata::IdentData::analysisCollection, pwiz::identdata::DataCollection::analysisData, pwiz::identdata::IdentData::analysisProtocolCollection, pwiz::identdata::IdentData::analysisSampleCollection, pwiz::identdata::IdentData::analysisSoftwareList, pwiz::identdata::IdentData::auditCollection, BFS_STRING, pwiz::identdata::IdentData::bibliographicReference, pwiz::data::ParamContainer::clear(), pwiz::identdata::SpectrumIdentificationProtocol::databaseFilters, pwiz::identdata::SpectrumIdentificationProtocol::databaseTranslation, pwiz::identdata::IdentData::dataCollection, pwiz::identdata::SequenceCollection::dbSequences, pwiz::identdata::PeptideEvidence::end, pwiz::identdata::Enzymes::enzymes, pwiz::identdata::SpectrumIdentificationProtocol::enzymes, pwiz::identdata::DataCollection::inputs, pwiz::identdata::SpectrumIdentificationProtocol::massTable, max(), pwiz::identdata::peptide(), pwiz::identdata::SequenceCollection::peptides, pwiz::identdata::PeptideEvidence::post, pwiz::identdata::PeptideEvidence::pre, pwiz::identdata::AnalysisCollection::proteinDetection, pwiz::identdata::AnalysisData::proteinDetectionListPtr, pwiz::identdata::IdentData::provider, pwiz::identdata::AnalysisSampleCollection::samples, pwiz::identdata::IdentData::sequenceCollection, pwiz::identdata::Inputs::sourceFile, pwiz::identdata::AnalysisCollection::spectrumIdentification, pwiz::identdata::AnalysisData::spectrumIdentificationList, pwiz::identdata::AnalysisProtocolCollection::spectrumIdentificationProtocol, pwiz::identdata::PeptideEvidence::start, and pwiz::identdata::SpectrumIdentificationProtocol::threshold.

Referenced by testSerialize().

{
mzid.bibliographicReference.clear();
mzid.auditCollection.clear();
mzid.provider = Provider();
BOOST_FOREACH(AnalysisSoftwarePtr& as, mzid.analysisSoftwareList)
{
as->URI.clear();
as->customizations.clear();
as->contactRolePtr.reset();
}
// pepXML only provides a single min_number_termini and max_num_internal_cleavages for all enzymes
int minSpecificity = *boost::range::min_element(sip.enzymes.enzymes | boost::adaptors::transformed(EnzymePtr_specificity()));
int maxMissedCleavages = *boost::range::max_element(sip.enzymes.enzymes | boost::adaptors::transformed(EnzymePtr_missedCleavages()));
BOOST_FOREACH(const EnzymePtr& ez, sip.enzymes.enzymes)
{
ez->terminalSpecificity = (proteome::Digestion::Specificity) minSpecificity;
ez->missedCleavages = maxMissedCleavages;
}
// pepXML doesn't map these elements
sip.massTable.clear();
sip.databaseFilters.clear();
sip.databaseTranslation.reset();
// pepXML doesn't map these attributes
mzid.analysisCollection.spectrumIdentification[0]->searchDatabase[0]->name.clear();
mzid.analysisCollection.spectrumIdentification[0]->searchDatabase[0]->version.clear();
mzid.analysisCollection.spectrumIdentification[0]->searchDatabase[0]->releaseDate.clear();
mzid.analysisCollection.spectrumIdentification[0]->searchDatabase[0]->databaseName.clear();
// pepXML doesn't reliably store location or file format
string& location = mzid.analysisCollection.spectrumIdentification[0]->inputSpectra[0]->location;
location = BFS_STRING(bfs::path(location).replace_extension("").filename());
mzid.analysisCollection.spectrumIdentification[0]->inputSpectra[0]->fileFormat = CVParam();
string& location2 = mzid.analysisCollection.spectrumIdentification[0]->searchDatabase[0]->location;
location2 = BFS_STRING(bfs::path(location2).replace_extension("").filename());
// pepXML doesn't support protein sequences
BOOST_FOREACH(DBSequencePtr& dbSequence, mzid.sequenceCollection.dbSequences)
{
dbSequence->seq.clear();
dbSequence->length = 0;
dbSequence->id = "DBSeq_" + dbSequence->accession;
}
// pepXML can only support one mass type (we pick the max mass in case one of them is 0)
BOOST_FOREACH(ModificationPtr& mod, peptide->modification)
mod->monoisotopicMassDelta = mod->avgMassDelta = max(mod->monoisotopicMassDelta, mod->avgMassDelta);
// pepXML doesn't support fragment metadata
mzid.dataCollection.analysisData.spectrumIdentificationList[0]->fragmentationTable.clear();
BOOST_FOREACH(SpectrumIdentificationResultPtr& sir, mzid.dataCollection.analysisData.spectrumIdentificationList[0]->spectrumIdentificationResult)
BOOST_FOREACH(SpectrumIdentificationItemPtr& sii, sir->spectrumIdentificationItem)
{
// pepXML doesn't support fragment metadata or mass tables
sii->fragmentation.clear();
sii->massTablePtr.reset();
for (size_t i=0; i < sii->peptideEvidencePtr.size(); ++i)
{
PeptideEvidence& pe = *sii->peptideEvidencePtr[i];
// pepXML does not store peptide start and end offsets
pe.start = pe.end = 0;
// pepXML's alternative_proteins do not store prev/next AA or missed cleavages
if (i > 0)
pe.pre = pe.post = '?';
}
}
// pepXML doesn't have protein assembly
}
void testTranslation ( const string &  str)

Definition at line 144 of file Serializer_pepXML_Test.cpp.

References unit_assert.

Referenced by testSerializeReally().

{
// test that search engine name is written using preferred name
unit_assert(bal::contains(str, "search_engine=\"Mascot\""));
// test that score names are written using preferred name
unit_assert(bal::contains(str, "name=\"ionscore\""));
unit_assert(bal::contains(str, "name=\"homologyscore\""));
unit_assert(bal::contains(str, "name=\"identityscore\""));
unit_assert(bal::contains(str, "name=\"expect\""));
unit_assert(bal::contains(str, "name=\"an extra score\""));
// test that nativeID is preserved
unit_assert(bal::contains(str, "spectrumNativeID=\"controllerType=0 controllerNumber=1 scan=420\""));
}
void testSerializeReally ( IdentData mzid,
const Serializer_pepXML::Config config 
)

Definition at line 160 of file Serializer_pepXML_Test.cpp.

References diff(), os_, pwiz::identdata::Serializer_pepXML::read(), pwiz::identdata::Serializer_pepXML::Config::readSpectrumQueries, pwiz::identdata::References::resolve(), testTranslation(), unit_assert, and pwiz::identdata::Serializer_pepXML::write().

Referenced by testSerialize().

{
if (os_) *os_ << "begin testSerialize" << endl;
Serializer_pepXML serializer(config);
ostringstream oss;
serializer.write(oss, mzid, "tiny.pepXML");
if (os_) *os_ << "oss:\n" << oss.str() << endl;
if (config.readSpectrumQueries)
testTranslation(oss.str());
shared_ptr<istringstream> iss(new istringstream(oss.str()));
IdentData mzid2;
serializer.read(iss, mzid2);
if (os_ && diff) *os_ << diff << endl;
}
void testSerialize ( )

Definition at line 183 of file Serializer_pepXML_Test.cpp.

References pwiz::identdata::IdentData::analysisCollection, pwiz::identdata::DataCollection::analysisData, pwiz::identdata::IdentData::analysisProtocolCollection, pwiz::identdata::IdentData::dataCollection, pwiz::identdata::SequenceCollection::dbSequences, pwiz::identdata::examples::initializeBasicSpectrumIdentification(), MS_Asp_N, MS_Trypsin_P, NonSpecific, pwiz::identdata::SequenceCollection::peptideEvidence, pwiz::identdata::SequenceCollection::peptides, pwiz::identdata::AnalysisData::proteinDetectionListPtr, SemiSpecific, pwiz::identdata::IdentData::sequenceCollection, pwiz::identdata::AnalysisCollection::spectrumIdentification, pwiz::identdata::AnalysisData::spectrumIdentificationList, pwiz::identdata::AnalysisProtocolCollection::spectrumIdentificationProtocol, stripUnmappedMetadata(), and testSerializeReally().

{
IdentData mzid;
// test non-specific enzyme
EnzymePtr noEnzyme(new Enzyme);
noEnzyme->id = "ENZ_1";
noEnzyme->cTermGain = "OH";
noEnzyme->nTermGain = "H";
noEnzyme->missedCleavages = 2;
noEnzyme->minDistance = 1;
noEnzyme->terminalSpecificity = proteome::Digestion::NonSpecific;
noEnzyme->siteRegexp = "(?<=[KR])";
noEnzyme->enzymeName.set(MS_Trypsin_P);
mzid.analysisProtocolCollection.spectrumIdentificationProtocol[0]->enzymes.enzymes.push_back(noEnzyme);
// test sense="N" enzymes
EnzymePtr aspN(new Enzyme);
aspN->id = "ENZ_1";
aspN->cTermGain = "OH";
aspN->nTermGain = "H";
aspN->missedCleavages = 2;
aspN->minDistance = 1;
aspN->terminalSpecificity = proteome::Digestion::FullySpecific;
aspN->siteRegexp = "(?=[BD])";
aspN->enzymeName.set(MS_Asp_N);
mzid.analysisProtocolCollection.spectrumIdentificationProtocol[0]->enzymes.enzymes.push_back(aspN);
aspN->missedCleavages = 4;
aspN->minDistance = 2;
aspN->terminalSpecificity = proteome::Digestion::SemiSpecific;
aspN->siteRegexp = "(?=[BND])";
aspN->enzymeName.clear();
aspN->enzymeName.userParams.push_back(UserParam("custom"));
// test with readSpectrumQueries == false
// clear the original SequenceCollection
// clear the original analysis data
mzid.analysisCollection.spectrumIdentification[0]->inputSpectra[0]->spectrumIDFormat = CVParam();
mzid.analysisCollection.spectrumIdentification[0]->spectrumIdentificationListPtr.reset();
}
void testPepXMLSpecificity ( )

Definition at line 245 of file Serializer_pepXML_Test.cpp.

References pwiz::data::ParamContainer::clear(), pwiz::identdata::cleavageAgent(), pwiz::identdata::PepXMLSpecificity::cut, e(), pwiz::identdata::Enzyme::enzymeName, pwiz::proteome::Digestion::getCleavageAgentRegex(), pwiz::proteome::Digestion::getCleavageAgents(), MS_Asp_N, MS_Trypsin, MS_Trypsin_P, pwiz::identdata::Identifiable::name, pwiz::identdata::PepXMLSpecificity::no_cut, pwiz::identdata::pepXMLSpecificity(), pwiz::identdata::PepXMLSpecificity::sense, pwiz::data::ParamContainer::set(), pwiz::identdata::Enzyme::siteRegexp, unit_assert, unit_assert_operator_equal, and pwiz::data::ParamContainer::userParams.

Referenced by main().

{
Enzyme ez;
result = pepXMLSpecificity(ez);
result = pepXMLSpecificity(ez);
ez.enzymeName.userParams.push_back(UserParam("trypsin/p"));
result = pepXMLSpecificity(ez);
ez.name = "trypsin/p";
result = pepXMLSpecificity(ez);
ez.name.clear();
result = pepXMLSpecificity(ez);
result = pepXMLSpecificity(ez);
result = pepXMLSpecificity(ez);
result = pepXMLSpecificity(ez);
// REMEMBER: update the pepXMLSpecificity function when new CV enzymes are added
bool allCleavageAgentsHandled = true;
ez.siteRegexp.clear();
try
{
ez.enzymeName.set(cleavageAgent);
result = pepXMLSpecificity(ez);
}
catch (exception& e)
{
cerr << e.what() << endl;
allCleavageAgentsHandled = false;
}
unit_assert(allCleavageAgentsHandled);
ez.siteRegexp = "(?<=[QWERTY])(?=[QWERTY])";
result = pepXMLSpecificity(ez);
unit_assert_operator_equal("QWERTY", result.cut);
unit_assert_operator_equal("ABCDFGHIJKLMNOPSUVZ", result.no_cut);
ez.siteRegexp = "(?<![QWERTY])(?![QWERTY])";
result = pepXMLSpecificity(ez);
unit_assert_operator_equal("ABCDFGHIJKLMNOPSUVZ", result.cut);
ez.siteRegexp = "(?<=[QWERTY])";
result = pepXMLSpecificity(ez);
unit_assert_operator_equal("QWERTY", result.cut);
ez.siteRegexp = "(?=[QWERTY])";
result = pepXMLSpecificity(ez);
unit_assert_operator_equal("QWERTY", result.cut);
ez.siteRegexp = "(?<![QWERTY])";
result = pepXMLSpecificity(ez);
unit_assert_operator_equal("ABCDFGHIJKLMNOPSUVZ", result.cut);
ez.siteRegexp = "(?![QWERTY])";
result = pepXMLSpecificity(ez);
unit_assert_operator_equal("ABCDFGHIJKLMNOPSUVZ", result.cut);
}
void testStripChargeFromConventionalSpectrumId ( )

Definition at line 360 of file Serializer_pepXML_Test.cpp.

References pwiz::identdata::stripChargeFromConventionalSpectrumId(), and unit_assert_operator_equal.

Referenced by main().

{
unit_assert_operator_equal("basename.123.123", stripChargeFromConventionalSpectrumId("basename.123.123.2"));
unit_assert_operator_equal("basename.ext.123.123", stripChargeFromConventionalSpectrumId("basename.ext.123.123.12"));
unit_assert_operator_equal("basename.ext.3.3", stripChargeFromConventionalSpectrumId("basename.ext.3.3.3"));
unit_assert_operator_equal("basename.123.123", stripChargeFromConventionalSpectrumId("basename.123.123"));
unit_assert_operator_equal("basename.ext.123.123", stripChargeFromConventionalSpectrumId("basename.ext.123.123"));
unit_assert_operator_equal("locus:1.1.1.123", stripChargeFromConventionalSpectrumId("locus:1.1.1.123.2"));
}
int main ( int  argc,
char **  argv 
)

Definition at line 374 of file Serializer_pepXML_Test.cpp.

References e(), os_, TEST_EPILOG, TEST_FAILED, TEST_PROLOG, testPepXMLSpecificity(), testSerialize(), and testStripChargeFromConventionalSpectrumId().

{
TEST_PROLOG(argc, argv)
try
{
if (argc>1 && !strcmp(argv[1],"-v")) os_ = &cout;
}
catch (exception& e)
{
TEST_FAILED(e.what())
}
catch (...)
{
TEST_FAILED("Caught unknown exception.")
}
}

Variable Documentation

ostream* os_ = 0

Definition at line 43 of file Serializer_pepXML_Test.cpp.