ProteoWizard
SAXParserTest.cpp
Go to the documentation of this file.
1 //
2 // $Id: SAXParserTest.cpp 4129 2012-11-20 00:05:37Z chambm $
3 //
4 //
5 // Original author: Darren Kessner <darren@proteowizard.org>
6 //
7 // Copyright 2007 Spielberg Family Center for Applied Proteomics
8 // Cedars-Sinai Medical Center, Los Angeles, California 90048
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 // http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // Unless required by applicable law or agreed to in writing, software
17 // distributed under the License is distributed on an "AS IS" BASIS,
18 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 // See the License for the specific language governing permissions and
20 // limitations under the License.
21 //
22 
23 
25 #include "SAXParser.hpp"
28 #include <cstring>
29 
30 
31 using namespace pwiz::util;
32 using namespace pwiz::minimxml;
33 using namespace pwiz::minimxml::SAXParser;
34 
35 
36 ostream* os_;
37 
38 // note: this tests single-quoted double quotes
39 const char* sampleXML =
40  "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
41  "<RootElement param=\"value\">\n"
42  " <FirstElement escaped_attribute=\"&quot;&lt;&amp;lt;&gt;&quot;\">\n"
43  " Some Text with Entity References: &lt;&amp;&gt;\n"
44  " </FirstElement>\n"
45  " <SecondElement param2=\"something\" param3=\"something.else 1234-56\">\n"
46  " Pre-Text <Inline>Inlined text with <![CDATA[<&\">]]></Inline> Post-text. <br/>\n"
47  " </SecondElement>\n"
48  " <prefix:ThirdElement goober:name=\"value\">\n"
49  " <!--this is a comment-->\n"
50  " <empty_with_space />\n"
51  " </prefix:ThirdElement>\n"
52  " <FifthElement leeloo='>Leeloo > mul-\"tipass'>\n"
53  " You're a monster, Zorg.>I know.\n"
54  " </FifthElement>\n"
55  "</RootElement>\n"
56  "<AnotherRoot>The quick brown fox jumps over the lazy dog.</AnotherRoot>\n";
57 
58 
59 //
60 // demo of event handling
61 //
62 
63 
65 {
66  PrintAttribute(ostream& os) : os_(os) {}
67  ostream& os_;
68 
69  void operator()(const Handler::Attributes::attribute &attr)
70  {
71  os_ << " (" << attr.getName() << "," << attr.getValue() << ")";
72  }
73 };
74 
75 
76 class PrintEventHandler : public Handler
77 {
78  public:
79 
80  PrintEventHandler(ostream& os)
81  : os_(os)
82  {}
83 
84  virtual Status processingInstruction(const string& name,
85  const string& value,
87  {
88  os_ << "[0x" << hex << position << "] processingInstruction: (" << name << "," << value << ")\n";
89  return Status::Ok;
90  };
91 
92  virtual Status startElement(const string& name,
93  const Attributes& attributes,
95  {
96  os_ << "[0x" << hex << position << "] startElement: " << name;
97  for_each(attributes.begin(), attributes.end(), PrintAttribute(os_));
98  os_ << endl;
99  return Status::Ok;
100  };
101 
102  virtual Status endElement(const string& name, stream_offset position)
103  {
104  os_ << "[0x" << hex << position << "] endElement: " << name << endl;
105  return Status::Ok;
106  }
107 
108  virtual Status characters(const SAXParser::saxstring& text, stream_offset position)
109  {
110  os_ << "[0x" << hex << position << "] text: " << text << endl;
111  return Status::Ok;
112  }
113 
114  private:
115  ostream& os_;
116 };
117 
118 
119 void demo()
120 {
121  if (os_)
122  {
123  *os_ << "sampleXML:\n" << sampleXML << endl;
124 
125  istringstream is(sampleXML);
126  PrintEventHandler handler(*os_);
127 
128  *os_ << "first parse events:\n";
129  parse(is, handler);
130  *os_ << endl;
131 
132  *os_ << "second parse events:\n";
133  parse(is, handler);
134  *os_ << endl;
135  }
136 }
137 
138 
139 //
140 // C++ model of the sample XML
141 //
142 
143 
144 struct First
145 {
147  string text;
148 };
149 
150 
151 struct Second
152 {
153  string param2;
154  string param3;
155  vector<string> text;
156 };
157 
158 
159 struct Fifth
160 {
161  string leeloo;
162  string mr_zorg;
163 };
164 
165 
166 struct Root
167 {
168  string param;
172 };
173 
174 
175 //
176 //
177 // Handlers to connect XML to C++ model
178 //
179 
180 
181 void readAttribute(const Handler::Attributes& attributes,
182  const string& attributeName,
183  string& result)
184 {
185  Handler::Attributes::attribute_list::const_iterator it = attributes.find(attributeName);
186  if (it != attributes.end())
187  result = it->getValue();
188 }
189 
190 
191 class FirstHandler : public Handler
192 {
193  public:
194 
195  FirstHandler(First& first, bool autoUnescapeAttributes, bool autoUnescapeCharacters)
196  : object_(first)
197  {
198  parseCharacters = true;
199  this->autoUnescapeAttributes = autoUnescapeAttributes;
200  this->autoUnescapeCharacters = autoUnescapeCharacters;
201  }
202 
203  virtual Status startElement(const string& name,
204  const Handler::Attributes& attributes,
206  {
207  if (name == "FirstElement")
208  readAttribute(attributes, "escaped_attribute", object_.escaped_attribute);
209  return Status::Ok;
210  }
211 
212  virtual Status characters(const SAXParser::saxstring& text, stream_offset position)
213  {
214  unit_assert_operator_equal(0x8f, position);
215  object_.text = text.c_str();
216  return Status::Ok;
217  }
218 
219  virtual Status endElement(const string& name, stream_offset position)
220  {
221  unit_assert_operator_equal(0xc3, position);
222  return Status::Ok;
223  }
224 
225  private:
227 };
228 
229 
230 class SecondHandler : public Handler
231 {
232  public:
233 
234  SecondHandler(Second& object, bool autoUnescapeAttributes, bool autoUnescapeCharacters)
235  : object_(object)
236  {
237  parseCharacters = true;
238  this->autoUnescapeAttributes = autoUnescapeAttributes;
239  this->autoUnescapeCharacters = autoUnescapeCharacters;
240  }
241 
242  virtual Status startElement(const string& name,
243  const Handler::Attributes& attributes,
245  {
246  if (name == "SecondElement")
247  {
248  readAttribute(attributes, "param2", object_.param2);
249  readAttribute(attributes, "param3", object_.param3);
250  // long as we're here, verify copyability of Handler::Attributes
251  Handler::Attributes *copy1 = new Handler::Attributes(attributes);
252  Handler::Attributes copy2(*copy1);
253  delete copy1;
254  std::string str;
255  readAttribute(copy2, "param2", str);
256  unit_assert(str==object_.param2);
257  }
258 
259  return Status::Ok;
260  }
261 
262  virtual Status characters(const SAXParser::saxstring& text, stream_offset position)
263  {
264  object_.text.push_back(text.c_str());
265  return Status::Ok;
266  }
267 
268  private:
270 };
271 
272 
273 class FifthHandler : public Handler
274 {
275  public:
276 
277  FifthHandler(Fifth& object, bool autoUnescapeAttributes, bool autoUnescapeCharacters)
278  : object_(object)
279  {
280  parseCharacters = true;
281  this->autoUnescapeAttributes = autoUnescapeAttributes;
282  this->autoUnescapeCharacters = autoUnescapeCharacters;
283  }
284 
285  virtual Status startElement(const string& name,
286  const Handler::Attributes& attributes,
288  {
289  if (name == "FifthElement")
290  {
291  getAttribute(attributes, "leeloo", object_.leeloo);
292  }
293 
294  return Status::Ok;
295  }
296 
297  virtual Status characters(const SAXParser::saxstring& text, stream_offset position)
298  {
299  object_.mr_zorg = text.c_str();
300  return Status::Ok;
301  }
302 
303  virtual Status endElement(const string& name, stream_offset position)
304  {
305  unit_assert_operator_equal(0x262, position);
306  return Status::Ok;
307  }
308 
309  private:
311 };
312 
313 
314 class RootHandler : public Handler
315 {
316  public:
317 
318  RootHandler(Root& root, bool autoUnescapeAttributes = true, bool autoUnescapeCharacters = true)
319  : object_(root),
320  firstHandler_(object_.first, autoUnescapeAttributes, autoUnescapeCharacters),
321  secondHandler_(object_.second, autoUnescapeAttributes, autoUnescapeCharacters),
322  fifthHandler_(object_.fifth, autoUnescapeAttributes, autoUnescapeCharacters)
323  {
324  parseCharacters = true;
325  this->autoUnescapeAttributes = autoUnescapeAttributes;
326  this->autoUnescapeCharacters = autoUnescapeCharacters;
327  }
328 
329  virtual Status startElement(const string& name,
330  const Attributes& attributes,
332  {
333  if (name == "RootElement")
334  {
335  readAttribute(attributes, "param", object_.param);
336  unit_assert_operator_equal(0x27, position);
337  }
338  else if (name == "FirstElement")
339  {
340  // delegate handling to a FirstHandler
341  unit_assert_operator_equal(0x47, position);
342  return Status(Status::Delegate, &firstHandler_);
343  }
344  else if (name == "SecondElement")
345  {
346  // delegate handling to a SecondHandler
347  return Status(Status::Delegate, &secondHandler_);
348  }
349  else if (name == "FifthElement")
350  {
351  // delegate handling to a FifthHandler
352  return Status(Status::Delegate, &fifthHandler_);
353  }
354 
355  return Status::Ok;
356  }
357 
358  private:
363 };
364 
365 
366 void test()
367 {
368  if (os_) *os_ << "test()\n";
369 
370  istringstream is(sampleXML);
371  Root root;
372  RootHandler rootHandler(root);
373  parse(is, rootHandler);
374 
375  if (os_)
376  {
377  *os_ << "root.param: " << root.param << endl
378  << "first.escaped_attribute: " << root.first.escaped_attribute << endl
379  << "first.text: " << root.first.text << endl
380  << "second.param2: " << root.second.param2 << endl
381  << "second.param3: " << root.second.param3 << endl
382  << "second.text: ";
383  copy(root.second.text.begin(), root.second.text.end(), ostream_iterator<string>(*os_,"|"));
384  *os_ << "\nfifth.leeloo: " << root.fifth.leeloo << endl
385  << "fifth.mr_zorg: " << root.fifth.mr_zorg << endl
386  << "\n";
387  }
388 
389  unit_assert_operator_equal("value", root.param);
391  unit_assert_operator_equal("Some Text with Entity References: <&>", root.first.text);
392  unit_assert_operator_equal("something", root.second.param2);
393  unit_assert_operator_equal("something.else 1234-56", root.second.param3);
394  unit_assert_operator_equal(4, root.second.text.size());
395  unit_assert_operator_equal("Pre-Text", root.second.text[0]);
396  unit_assert_operator_equal("Inlined text with", root.second.text[1]);
397  unit_assert_operator_equal("<&\">", root.second.text[2]);
398  unit_assert_operator_equal("Post-text.", root.second.text[3]);
399  unit_assert_operator_equal(">Leeloo > mul-\"tipass", root.fifth.leeloo);
400  unit_assert_operator_equal("You're a monster, Zorg.>I know.", root.fifth.mr_zorg);
401 }
402 
403 
405 {
406  if (os_) *os_ << "testNoAutoUnescape()\n";
407 
408  istringstream is(sampleXML);
409  Root root;
410  RootHandler rootHandler(root, false, false);
411  parse(is, rootHandler);
412 
413  if (os_)
414  {
415  *os_ << "root.param: " << root.param << endl
416  << "first.escaped_attribute: " << root.first.escaped_attribute << endl
417  << "first.text: " << root.first.text << endl
418  << "second.param2: " << root.second.param2 << endl
419  << "second.param3: " << root.second.param3 << endl
420  << "second.text: ";
421  copy(root.second.text.begin(), root.second.text.end(), ostream_iterator<string>(*os_,"|"));
422  *os_ << "\n\n";
423  }
424 
425  unit_assert_operator_equal("value", root.param);
426  unit_assert_operator_equal("&quot;&lt;&amp;lt;&gt;&quot;", root.first.escaped_attribute);
427  unit_assert_operator_equal("Some Text with Entity References: &lt;&amp;&gt;", root.first.text);
428  unit_assert_operator_equal("something", root.second.param2);
429  unit_assert_operator_equal("something.else 1234-56", root.second.param3);
430  unit_assert_operator_equal(4, root.second.text.size());
431  unit_assert_operator_equal("Pre-Text", root.second.text[0]);
432  unit_assert_operator_equal("Inlined text with", root.second.text[1]);
433  unit_assert_operator_equal("<&\">", root.second.text[2]);
434  unit_assert_operator_equal("Post-text.", root.second.text[3]);
435 }
436 
437 
439 {
440  public:
441 
442  virtual Status startElement(const string& name,
443  const Attributes& attributes,
445  {
446  if (name == "AnotherRoot")
447  {
448  unit_assert_operator_equal(0x281, position);
449  return Status::Done;
450  }
451 
452  return Status::Ok;
453  }
454 };
455 
456 
457 void testDone()
458 {
459  if (os_) *os_ << "testDone()\n";
460 
461  istringstream is(sampleXML);
462  AnotherRootHandler handler;
463  parse(is, handler); // parses <RootElement> ... </RootElement>
464  parse(is, handler); // parses <AnotherRootElement> and aborts
465 
466  string buffer;
467  getline(is, buffer, '<');
468 
469  if (os_) *os_ << "buffer: " << buffer << "\n\n";
470  unit_assert_operator_equal("The quick brown fox jumps over the lazy dog.", buffer);
471 }
472 
473 
475 {
476  if (os_) *os_ << "testBadXML()\n";
477 
478  const char* bad = "<A><B></A></B>";
479  istringstream is(bad);
480  Handler handler;
481 
482  try
483  {
484  parse(is, handler);
485  }
486  catch (exception& e)
487  {
488  if (os_) *os_ << e.what() << "\nOK: Parser caught bad XML.\n\n";
489  return;
490  }
491 
492  throw runtime_error("Parser failed to catch bad XML.");
493 }
494 
495 
497 {
498  int count;
499  NestedHandler() : count(0) {}
500 
501  virtual Status endElement(const string& name, stream_offset position)
502  {
503  count++;
504  return Status::Ok;
505  }
506 };
507 
508 
510 {
511  if (os_) *os_ << "testNested()\n";
512  const char* nested = "<a><a></a></a>";
513  istringstream is(nested);
514 
515  NestedHandler nestedHandler;
516  parse(is, nestedHandler);
517  if (os_) *os_ << "count: " << nestedHandler.count << "\n\n";
518  unit_assert_operator_equal(2, nestedHandler.count);
519 }
520 
521 
523 {
524  if (os_) *os_ << "testRootElement()\n";
525 
526  string RootElement = "RootElement";
528 
529  istringstream sampleXMLStream(sampleXML);
530  unit_assert_operator_equal(RootElement, xml_root_element(sampleXMLStream));
531 
532  {ofstream sampleXMLFile("testRootElement.xml"); sampleXMLFile << sampleXML;}
533  unit_assert_operator_equal(RootElement, xml_root_element_from_file("testRootElement.xml"));
534  bfs::remove("testRootElement.xml");
535 
536  unit_assert_operator_equal(RootElement, xml_root_element("<?xml?><RootElement>"));
537  unit_assert_operator_equal(RootElement, xml_root_element("<?xml?><RootElement name='value'"));
538 
539  unit_assert_throws(xml_root_element("not-xml"), runtime_error);
540 }
541 
542 
544 {
545  string id1("_x0031_invalid_x0020_ID");
546  unit_assert_operator_equal("1invalid ID", decode_xml_id_copy(id1));
547  unit_assert_operator_equal((void *)&id1, (void *)&decode_xml_id(id1)); // should return reference to id1
548  unit_assert_operator_equal("1invalid ID", id1);
549 
550  string id2("_invalid-ID__x0023_2__x003c_3_x003e_");
551  unit_assert_operator_equal("_invalid-ID_#2_<3>", decode_xml_id_copy(id2));
552  unit_assert_operator_equal("_invalid-ID_#2_<3>", decode_xml_id(id2));
553 
554  string crazyId("_x0021__x0021__x0021_");
555  unit_assert_operator_equal("!!!", decode_xml_id(crazyId));
556 }
557 
559 {
560  std::string str = " \t foo \n";
561  saxstring xstr = str;
562  unit_assert_operator_equal(xstr,str);
563  unit_assert_operator_equal(xstr,str.c_str());
564  unit_assert_operator_equal(str.length(),xstr.length());
565  xstr.trim_lead_ws();
566  unit_assert_operator_equal(xstr.length(),str.length()-3);
567  unit_assert_operator_equal(xstr,str.substr(3));
568  xstr.trim_trail_ws();
569  unit_assert_operator_equal(xstr.length(),str.length()-5);
570  unit_assert_operator_equal(xstr,str.substr(3,3));
571  unit_assert_operator_equal(xstr[1],'o');
572  xstr[1] = '0';
573  unit_assert_operator_equal(xstr[1],'0');
574  std::string str2(xstr.data());
575  unit_assert_operator_equal(str2,"f0o");
576  std::string str3(xstr.c_str());
577  unit_assert_operator_equal(str2,str3);
578  saxstring xstr2(xstr);
579  unit_assert_operator_equal(xstr2,xstr);
580  saxstring xstr3;
581  unit_assert_operator_equal(xstr3.c_str(),std::string());
582 }
583 
584 int main(int argc, char* argv[])
585 {
586  TEST_PROLOG(argc, argv)
587 
588  try
589  {
590  if (argc>1 && !strcmp(argv[1],"-v")) os_ = &cout;
591  demo();
593  test();
595  testDone();
596  testBadXML();
597  testNested();
598  testRootElement();
599  testDecoding();
600  }
601  catch (exception& e)
602  {
603  TEST_FAILED(e.what())
604  }
605  catch (...)
606  {
607  TEST_FAILED("Caught unknown exception.")
608  }
609 
611 }
612