ProteoWizard
MSDataFileTest.cpp
Go to the documentation of this file.
1 //
2 // $Id: MSDataFileTest.cpp 4129 2012-11-20 00:05:37Z chambm $
3 //
4 //
5 // Original author: Darren Kessner <darren@proteowizard.org>
6 //
7 // Copyright 2007 Spielberg Family Center for Applied Proteomics
8 // Cedars-Sinai Medical Center, Los Angeles, California 90048
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 // http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // Unless required by applicable law or agreed to in writing, software
17 // distributed under the License is distributed on an "AS IS" BASIS,
18 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 // See the License for the specific language governing permissions and
20 // limitations under the License.
21 //
22 
23 
24 #include "MSDataFile.hpp"
25 #include "Diff.hpp"
26 #include "IO.hpp"
27 #include "SpectrumListBase.hpp"
28 #include "ChromatogramListBase.hpp"
29 #include "examples.hpp"
33 #include <boost/iostreams/filtering_stream.hpp>
34 #include <boost/iostreams/filter/gzip.hpp>
35 #include <boost/iostreams/device/file_descriptor.hpp>
36 #include <boost/iostreams/copy.hpp>
37 
38 
39 using namespace pwiz::util;
40 using namespace pwiz::cv;
41 using namespace pwiz::data;
42 using namespace pwiz::msdata;
43 
44 
45 ostream* os_ = 0;
46 
47 
48 string filenameBase_ = "temp.MSDataFileTest";
49 
50 
52 {
53  // remove metadata ptrs appended on read
54  vector<SourceFilePtr>& sfs = msd.fileDescription.sourceFilePtrs;
55  if (!sfs.empty()) sfs.erase(sfs.end()-1);
56  vector<SoftwarePtr>& sws = msd.softwarePtrs;
57  if (!sws.empty()) sws.erase(sws.end()-1);
58 
59  // remove current DataProcessing created on read
60  SpectrumListBase* sl = dynamic_cast<SpectrumListBase*>(msd.run.spectrumListPtr.get());
61  ChromatogramListBase* cl = dynamic_cast<ChromatogramListBase*>(msd.run.chromatogramListPtr.get());
64 }
65 
66 
68  const DiffConfig diffConfig)
69 {
70  if (os_) *os_ << "validateWriteRead()\n " << writeConfig << endl;
71 
72  string filename1 = filenameBase_ + ".1";
73  string filename2 = filenameBase_ + ".2";
74  string filename3 = filenameBase_ + ".3";
75 
76  {
77  // create MSData object in memory
78  MSData tiny;
80 
81  if (writeConfig.format == MSDataFile::Format_mzXML)
82  {
83  // remove s22 since it is not written to mzXML
84  static_cast<SpectrumListSimple&>(*tiny.run.spectrumListPtr).spectra.pop_back();
85  }
86 
87  // write to file #1 (static)
88  MSDataFile::write(tiny, filename1, writeConfig);
89 
90  // simulate CLI garbage collect behavior, wherein delayed deletes stress
91  // memory and file handle usage
92  {
93  std::vector< boost::shared_ptr< MSDataFile > > msds;
94  for (int i=0;i<100;i++)
95  {
96  boost::shared_ptr<MSDataFile> msd1(new MSDataFile(filename1));
97  msds.push_back(msd1);
98  hackInMemoryMSData(*msd1);
99  Diff<MSData, DiffConfig> diff(tiny, *msd1, diffConfig);
100  }
101  }
102 
103  // read back into an MSDataFile object
104  MSDataFile msd1(filename1);
105  hackInMemoryMSData(msd1);
106 
107  // compare
108  Diff<MSData, DiffConfig> diff(tiny, msd1, diffConfig);
109  if (diff && os_) *os_ << diff << endl;
110  unit_assert(!diff);
111 
112  // write to file #2 (member)
113  msd1.write(filename2, writeConfig);
114 
115  // read back into another MSDataFile object
116  MSDataFile msd2(filename2);
117  hackInMemoryMSData(msd2);
118 
119  // compare
120  diff(tiny, msd2);
121  if (diff && os_) *os_ << diff << endl;
122  unit_assert(!diff);
123 
124  // now give the gzip read a workout
125  bio::filtering_istream tinyGZ(bio::gzip_compressor() | bio::file_descriptor_source(filename1));
126  bio::copy(tinyGZ, bio::file_descriptor_sink(filename1+".gz", ios::out|ios::binary));
127 
128  MSDataFile msd3(filename1+".gz");
129  hackInMemoryMSData(msd3);
130 
131  // compare
132  diff(tiny, msd3);
133  if (diff && os_) *os_ << diff << endl;
134  unit_assert(!diff);
135 
136  // test writing to a stream
137  ostringstream oss;
138  msd1.write(oss, writeConfig);
139  string ossStr = oss.str();
140  ofstream ofs(filename3.c_str());
141  ofs.write(ossStr.c_str(), ossStr.length());
142  ofs.close();
143 
144  // read back into another MSDataFile object
145  MSDataFile msd4(filename3);
146  hackInMemoryMSData(msd4);
147 
148  // compare
149  diff(tiny, msd4);
150  if (diff && os_) *os_ << diff << endl;
151  unit_assert(!diff);
152  }
153 
154  // remove temp files
155  boost::filesystem::remove(filename1);
156  boost::filesystem::remove(filename2);
157  boost::filesystem::remove(filename1 + ".gz");
158  boost::filesystem::remove(filename3);
159 }
160 
161 void test()
162 {
163  MSDataFile::WriteConfig writeConfig;
164  DiffConfig diffConfig;
165 
166  // mzML 64-bit, full diff
167  validateWriteRead(writeConfig, diffConfig);
168 
169  writeConfig.indexed = false;
170  validateWriteRead(writeConfig, diffConfig); // no index
171  writeConfig.indexed = true;
172 
173  // mzML 32-bit, full diff
174  writeConfig.binaryDataEncoderConfig.precision = BinaryDataEncoder::Precision_32;
175  validateWriteRead(writeConfig, diffConfig);
176 
177  // mzXML 32-bit, diff ignoring metadata and chromatograms
178  writeConfig.format = MSDataFile::Format_mzXML;
179  diffConfig.ignoreMetadata = true;
180  diffConfig.ignoreChromatograms = true;
181  validateWriteRead(writeConfig, diffConfig);
182 
183  // mzXML 64-bit, diff ignoring metadata and chromatograms
184  writeConfig.binaryDataEncoderConfig.precision = BinaryDataEncoder::Precision_64;
185  validateWriteRead(writeConfig, diffConfig);
186 
187  writeConfig.indexed = false;
188  validateWriteRead(writeConfig, diffConfig); // no index
189  writeConfig.indexed = true;
190 }
191 
192 
193 void demo()
194 {
195  MSData tiny;
197 
199  MSDataFile::write(tiny, filenameBase_ + ".64.mzML", config);
200 
201  config.binaryDataEncoderConfig.precision = BinaryDataEncoder::Precision_32;
202  MSDataFile::write(tiny, filenameBase_ + ".32.mzML", config);
203 
205  MSDataFile::write(tiny, filenameBase_ + ".txt", config);
206 
208  MSDataFile::write(tiny, filenameBase_ + ".32.mzXML", config);
209 
210  config.binaryDataEncoderConfig.precision = BinaryDataEncoder::Precision_64;
211  MSDataFile::write(tiny, filenameBase_ + ".64.mzXML", config);
212 }
213 
214 
215 const char rawHeader_[] = {'\x01', '\xA1',
216  'F', '\0', 'i', '\0', 'n', '\0', 'n', '\0',
217  'i', '\0', 'g', '\0', 'a', '\0', 'n', '\0'};
218 
219 
220 class TestReader : public Reader
221 {
222  public:
223 
224  TestReader() : count(0) {}
225 
226  virtual std::string identify(const std::string& filename, const std::string& head) const
227  {
228  if (filename.size()<=4 || filename.substr(filename.size()-4)!=".RAW")
229  return std::string("");
230 
231  for (size_t i=0; i<sizeof(rawHeader_); i++)
232  if (head[i] != rawHeader_[i])
233  return std::string("");
234 
235  count++;
236  return filename;
237  }
238 
239  virtual void read(const std::string& filename, const std::string& head, MSData& result, int runIndex = 0,
240  const Config& config = Config()) const
241  {
242  count++;
243  }
244 
245  virtual void read(const std::string& filename,
246  const std::string& head,
247  std::vector<MSDataPtr>& results,
248  const Config& config = Config()) const
249  {
250  results.push_back(MSDataPtr(new MSData));
251  read(filename, head, *results.back(), 0, config);
252  }
253 
254  const char *getType() const {return "testReader";} // satisfy inheritance
255 
256  mutable int count;
257 };
258 
259 
261 {
262  // create a file
263  string filename = filenameBase_ + ".RAW";
264  ofstream os(filename.c_str());
265  os.write(rawHeader_, 18);
266  os.close();
267 
268  // open the file with our Reader
269  TestReader reader;
270  MSDataFile msd(filename, &reader);
271 
272  // verify that our reader got called properly
273  unit_assert(reader.count == 2);
274 
275  // remove temp file
276  boost::filesystem::remove(filename);
277 
278  if (os_) *os_ << endl;
279 }
280 
281 
282 void testSHA1()
283 {
284  if (os_) *os_ << "testSHA1()\n";
285 
286  // write out a test file
287 
288  string filename = filenameBase_ + ".SHA1Test";
289  MSData tiny;
291  MSDataFile::write(tiny, filename);
292 
293  {
294  // read in without SHA-1 calculation
295  MSDataFile msd(filename);
296 
297  if (os_)
298  {
299  *os_ << "no SHA-1:\n";
301  IO::write(writer, *msd.fileDescription.sourceFilePtrs.back());
302  }
303 
305  unit_assert(!msd.fileDescription.sourceFilePtrs.back()->hasCVParam(MS_SHA_1));
306 
307  // read in with SHA-1 calculation
308 
309  MSDataFile msd_sha1(filename, 0, true);
310 
311  if (os_)
312  {
313  *os_ << "with SHA-1:\n";
315  IO::write(writer, *msd_sha1.fileDescription.sourceFilePtrs.back());
316  }
317 
318  unit_assert(!msd_sha1.fileDescription.sourceFilePtrs.empty());
319  unit_assert(msd_sha1.fileDescription.sourceFilePtrs.back()->hasCVParam(MS_SHA_1));
320  }
321 
322  // clean up
323 
324  boost::filesystem::remove(filename);
325  if (os_) *os_ << endl;
326 }
327 
328 
329 int main(int argc, char* argv[])
330 {
331  TEST_PROLOG(argc, argv)
332 
333  try
334  {
335  if (argc>1 && !strcmp(argv[1],"-v")) os_ = &cout;
336  test();
337  //demo();
338  testReader();
339  testSHA1();
340  }
341  catch (exception& e)
342  {
343  TEST_FAILED(e.what())
344  }
345  catch (...)
346  {
347  TEST_FAILED("Caught unknown exception.")
348  }
349 
351 }
352