ProteoWizard
Configuration_mz5.hpp
Go to the documentation of this file.
1 //
2 // $Id: Configuration_mz5.hpp 3484 2012-04-04 19:55:33Z mwilhelm42 $
3 //
4 //
5 // Original authors: Mathias Wilhelm <mw@wilhelmonline.com>
6 // Marc Kirchner <mail@marc-kirchner.de>
7 //
8 // Copyright 2011 Proteomics Center
9 // Children's Hospital Boston, Boston, MA 02135
10 //
11 // Licensed under the Apache License, Version 2.0 (the "License");
12 // you may not use this file except in compliance with the License.
13 // You may obtain a copy of the License at
14 //
15 // http://www.apache.org/licenses/LICENSE-2.0
16 //
17 // Unless required by applicable law or agreed to in writing, software
18 // distributed under the License is distributed on an "AS IS" BASIS,
19 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 // See the License for the specific language governing permissions and
21 // limitations under the License.
22 //
23 
24 #ifndef CONFIGURATION_MZ5_HPP_
25 #define CONFIGURATION_MZ5_HPP_
26 
27 #include "Datastructures_mz5.hpp"
28 #include "../MSDataFile.hpp"
29 #include <string>
30 
31 namespace pwiz {
32 namespace msdata {
33 namespace mz5 {
34 
35 /**
36  * Configuration class for mz5 im- and export.
37  * This class is holding several different configuration options, such as dataset names, dataset types and different buffering and chunking values.
38  */
40 {
41 public:
42 
43  static unsigned short MZ5_FILE_MAJOR_VERSION;
44  static unsigned short MZ5_FILE_MINOR_VERSION;
45  static bool PRINT_HDF5_EXCEPTIONS;
46 
47  /**
48  * Enumeration for different load strategies for spectra.
49  */
51  {
52  /**
53  * Initializes all meta information of all spectra at the creation a an spectrum list obeject.
54  */
56  /**
57  * Initialzes all meta information of all spectra at the first getSpectrum() call.
58  */
60  //SLP_PreemptionMode not implemented yet
61  //SLP_OnDemand not implemented yet
62  //SLP_CachedOnDemand not implemented yet
63  };
64 
66  /**
67  * Initialize and keep all chromatograms when initializing the chromatogram list.
68  */
70  /**
71  * Initialize and keep all chromatograms when requesting the first chromatogram.
72  */
74  //CLP_PreemptionMode not implemented yet
75  //CLP_OnDemand not implemented yet
76  //CLP_CachedOnDemand not implemented yet
77  };
78 
79  /**
80  * Enumeration to simplify the use of datasets. These values are used to determine dataset specific parameters, such as chunk size, buffer size, name and type.
81  */
83  {
84  /**
85  * Dataset for the controlled vocabulary sets.
86  */
88  /**
89  * File content dataset.
90  */
92  /**
93  * Dataset containing contact infomation.
94  */
96  /**
97  * Dataset containing all used controlled vocabulary accessions (prefix, accession, definition).
98  */
100  /**
101  * Dataset containing all controlled vocabulary parameters.
102  */
104  /**
105  * Dataset containing all user parameters.
106  */
108  /**
109  * Dataset containing all referenced parameter groups.
110  */
112  /**
113  * Dataset for parameter groups.
114  */
116  /**
117  * Source file dataset.
118  */
120  /**
121  * Sample datatset.
122  */
124  /**
125  * Software dataset.
126  */
128  /**
129  * Scan setting datatset.
130  */
132  /**
133  * Instrument configuration datatset.
134  */
136  /**
137  * Data processing dataset.
138  */
140  /**
141  * Dataset containing all meta information for all runs.
142  */
144  /**
145  * Dataset containing all meta information of all spectra.
146  */
148  /**
149  * Dataset containing all meta information of all binary data elements for spectra.
150  */
152  /**
153  * Index dataset. kth element points to the end of the kth spectrum in MZ and SIntensity.
154  */
156  /**
157  * Dataset containing all mz values for all spectra.
158  */
160  /**
161  * Dataset containing all intensity values for all spectra.
162  */
164  /**
165  * Dataset containing all meta information of all chromatograms.
166  */
168  /**
169  * Dataset containing all meta information of all binary data elements for chromatograms.
170  */
172  /**
173  * Index dataset. kth element points to the end of the kth chromatogram in Time and CIntensity.
174  */
176  /**
177  * Dataset containing all time values.
178  */
180  /**
181  * Dataset containing all chromatogram intensities.
182  */
184  /**
185  * Dataset containing information about the file and specific dataset configurations.
186  */
188  };
189 
190  /**
191  * Default constructor.
192  * Initializes default parameters.
193  */
195 
196  /**
197  * Copy constructor.
198  */
200 
201  /**
202  * Conversion constructor for WriteConfig objects.
203  * Uses values in config to set up specific options such as compression or precision.
204  * @param config a pwiz config object
205  */
207 
208  /**
209  * Assign operator.
210  * @param rhs right hand side of assign operator.
211  * @return the altered object
212  */
214 
215  /**
216  * Returns dataset name for a requested dataset.
217  * @param v dataset
218  * @return dataset name
219  */
220  const std::string& getNameFor(const MZ5DataSets v);
221 
222  /**
223  * Returns dataset enumeration value for a given string.
224  * Returns out_of_range if string does not exist.
225  * @param name dataset name
226  * @return dataset enumeration value
227  */
228  MZ5DataSets getVariableFor(const std::string& name);
229 
230  /**
231  * Returns dataset type.
232  * @param v dataset
233  * @return mz5 data type reference
234  */
235  const H5::DataType& getDataTypeFor(const MZ5DataSets v);
236 
237  /**
238  * Returns chunk size for a dataset.
239  * @param v dataset
240  * @return chunk size. EMPTY_CHUNK_SIZE if no chunking should be used.
241  */
242  const hsize_t& getChunkSizeFor(const MZ5DataSets v);
243 
244  /**
245  * Returns buffer size for a dataset.
246  * Buffers are mainly used to speed up writing of mz and spectrum intensities.
247  * @param v dataset
248  * @return buffer size. NO_BUFFER_SIZE if no buffer will be used.
249  */
250  const size_t& getBufferSizeFor(const MZ5DataSets v);
251 
252  /**
253  * Returns mz5 cache in Mb.
254  * The mz5 cache is used for chunked datasets. This effects the random read time.
255  * @return mz5 cache size
256  */
257  const size_t& getBufferInMb();
258 
259  /**
260  * Returns mz5 cache in byte.
261  * See getBufferInMb()
262  * @return mz5 cache in byte
263  */
264  const size_t getBufferInB();
265 
266  /**
267  * Returns number of used rdcc slots.
268  * This is currently constant 41957L, but should be the the next prime after 10-100 times the number of chunks fitting into the cache.
269  * @return number of rdcc slots
270  */
271  const size_t& getRdccSlots();
272 
273  /**
274  * Getter for spectrum load policy.
275  * @return spectrum load policy
276  */
278 
279  /**
280  * Getter for chromatogram load policy.
281  * @return spectrum load policy
282  */
284 
285  /**
286  * Getter for translation flag.
287  * If this flag is set, mz values of mass spectra are saved as delta mz's. This greatly improves compression rate and significantly reduces file size.
288  * @return true of tranlating is enabled, otherwise false.
289  */
290  const bool doTranslating() const;
291 
292  /**
293  * Setter for translation flag.
294  * @param flag true of translation of mz and intensity values is be enabled;
295  */
296  void setTranslating(const bool flag) const;
297 
298  /**
299  * Getter for compression level. Default is 1 if compression is enabled, since the gain in compression rate of >2 is negligible.
300  * @return value between 0-9(0=no compression, 1=fast compression, 9=high compression)
301  */
302  const int getDeflateLvl();
303 
304  /**
305  * Getter for shuffle flag.
306  * Shuffle greatly increases compression rate.
307  * @return true if shuffel is enabled, otherwise false.
308  */
309  const bool doShuffel();
310 
311  /**
312  * Value for empty chunk size. Should be 0.
313  */
314  static hsize_t EMPTY_CHUNK_SIZE;
315  /**
316  * Default value to use no buffer. Should be 0.
317  */
318  static size_t NO_BUFFER_SIZE;
319 
320 private:
321  /**
322  * Initializes configuration object.
323  */
324  void init(const bool deltamz, const bool translateinten);
325 
326  /**
327  * Internal copy of pwiz configuration object.
328  */
330  /**
331  * Map which holds the translation of datasets to dataset names.
332  */
333  std::map<MZ5DataSets, std::string> variableNames_;
334  /**
335  * Map which holds the translation of dataset names to datasets.
336  */
337  std::map<std::string, MZ5DataSets> variableVariables_;
338  /**
339  * Map which holds the data types for a dataset.
340  */
341  std::map<MZ5DataSets, H5::DataType> variableTypes_;
342  /**
343  * Map which holds the chunk size for a dataset.
344  */
345  std::map<MZ5DataSets, hsize_t> variableChunkSizes_;
346  /**
347  * Map which holds the buffer size for a dataset.
348  */
349  std::map<MZ5DataSets, size_t> variableBufferSizes_;
350  /**
351  * MZ5 cache in MB
352  */
353  size_t bufferInMB_;
354  /**
355  * Number of rdcc slots.
356  */
357  size_t rdccSolts_;
358  /**
359  * Spectrum load policy
360  */
362  /**
363  * Chromaogram load policy
364  */
366  /**
367  * flag for translation.
368  */
369  mutable bool doTranslating_;
370  /**
371  * Compression level. If compression level is > 0, shuffle is enable by default.
372  */
374 };
375 
376 }
377 }
378 }
379 
380 #endif /* CONFIGURATION_MZ5_HPP_ */