otsdaq  v2_04_01
XmlDocument.cc
1 
2 #include "otsdaq-core/XmlUtilities/XmlDocument.h"
3 #include "otsdaq-core/Macros/CoutMacros.h"
4 #include "otsdaq-core/MessageFacility/MessageFacility.h"
5 #include "otsdaq-core/XmlUtilities/ConvertFromXML.h"
6 #include "otsdaq-core/XmlUtilities/ConvertToXML.h"
7 
8 #include <stdexcept>
9 #include <xercesc/dom/DOM.hpp>
10 #include <xercesc/dom/DOMDocument.hpp>
11 #include <xercesc/dom/DOMDocumentType.hpp>
12 #include <xercesc/dom/DOMElement.hpp>
13 #include <xercesc/dom/DOMImplementation.hpp>
14 #include <xercesc/dom/DOMImplementationLS.hpp>
15 #include <xercesc/dom/DOMImplementationRegistry.hpp>
16 #include <xercesc/parsers/XercesDOMParser.hpp>
17 //#include <xercesc/dom/DOMLSSerializer.hpp>
18 //#include <xercesc/dom/DOMLSOutput.hpp>
19 #include <xercesc/dom/DOMNodeIterator.hpp>
20 #include <xercesc/dom/DOMNodeList.hpp>
21 #include <xercesc/dom/DOMText.hpp>
22 #include <xercesc/validators/common/Grammar.hpp>
23 
24 #include <xercesc/parsers/XercesDOMParser.hpp>
25 #include <xercesc/util/XMLUni.hpp>
26 #include <xercesc/util/XercesDefs.hpp>
27 
28 #include <xercesc/framework/LocalFileFormatTarget.hpp>
29 #include <xercesc/util/OutOfMemoryException.hpp>
30 
31 #include <iostream>
32 #include <list>
33 #include <sstream>
34 
35 #include <errno.h>
36 #include <sys/stat.h>
37 #include <sys/types.h>
38 #include <unistd.h>
39 
40 using namespace ots;
41 
42 //==============================================================================
43 XmlDocument::XmlDocument(std::string rootName) : rootTagName_(rootName)
44 {
45  // INIT_MF("XmlDocument");
46  //__COUT__ << "in" << std::endl;
47  initDocument();
48  rootElement_ = theDocument_->getDocumentElement();
49  //__COUT__ << "out" << std::endl;
50 }
51 
52 //==============================================================================
53 XmlDocument::XmlDocument(const XmlDocument& doc) : rootTagName_(doc.rootTagName_)
54 {
55  //__COUT__ << "in" << std::endl;
56  *this = doc;
57  //__COUT__ << "out" << std::endl;
58 }
59 
60 //==============================================================================
61 XmlDocument& XmlDocument::operator=(const XmlDocument& doc)
62 {
63  //__COUT__ << "in" << std::endl;
64  initDocument();
65  rootElement_ = theDocument_->getDocumentElement();
66  recursiveElementCopy(doc.rootElement_, rootElement_);
67  //__COUT__ << "out" << std::endl;
68  return *this;
69 }
70 
71 //==============================================================================
72 XmlDocument::~XmlDocument(void)
73 {
74  //__COUT__ << "Xml Destructor" << std::endl;
75  terminatePlatform();
76 }
77 
78 //==============================================================================
79 void XmlDocument::initDocument(void)
80 {
81  initPlatform();
82 
83  theImplementation_ =
84  xercesc::DOMImplementationRegistry::getDOMImplementation(CONVERT_TO_XML("Core"));
85 
86  if(theImplementation_)
87  {
88  try
89  {
90  theDocument_ = theImplementation_->createDocument(
91  CONVERT_TO_XML("http://www.w3.org/2001/XMLSchema-instance"), // root
92  // element
93  // namespace
94  // URI.
95  CONVERT_TO_XML(rootTagName_), // root element name
96  0); // theDocument_ type object (DTD).
97  }
98  catch(const xercesc::OutOfMemoryException&)
99  {
100  XERCES_STD_QUALIFIER cerr << "OutOfMemoryException"
101  << XERCES_STD_QUALIFIER endl;
102  }
103  catch(const xercesc::DOMException& e)
104  {
105  XERCES_STD_QUALIFIER cerr << "DOMException code is: " << e.code
106  << XERCES_STD_QUALIFIER endl;
107  }
108  catch(const xercesc::XMLException& e)
109  {
110  __COUT__ << "Error Message: " << XML_TO_CHAR(e.getMessage()) << std::endl;
111  }
112  catch(...)
113  {
114  XERCES_STD_QUALIFIER cerr << "An error occurred creating the theDocument_"
115  << XERCES_STD_QUALIFIER endl;
116  }
117  }
118  else
119  XERCES_STD_QUALIFIER cerr << "Requested theImplementation_ is not supported"
120  << XERCES_STD_QUALIFIER endl;
121 }
122 
123 //==============================================================================
124 void XmlDocument::initPlatform(void)
125 {
126  try
127  {
128  xercesc::XMLPlatformUtils::Initialize(); // Initialize Xerces infrastructure
129  //__COUT__ << "Initialized new
130  // theDocument_" << std::endl;
131  }
132  catch(xercesc::XMLException& e)
133  {
134  __COUT__ << "XML toolkit initialization error: " << XML_TO_CHAR(e.getMessage())
135  << std::endl;
136  }
137 }
138 
139 //==============================================================================
140 void XmlDocument::terminatePlatform(void)
141 {
142  try
143  {
144  //__COUT__ << "Releasing the document" << std::endl;
145  theDocument_->release();
146  //__COUT__ << "document released" << std::endl;
147  }
148  catch(...)
149  {
150  XERCES_STD_QUALIFIER cerr << "An error occurred destroying the theDocument_"
151  << XERCES_STD_QUALIFIER endl;
152  }
153 
154  try
155  {
156  xercesc::XMLPlatformUtils::Terminate(); // Terminate after release of memory
157  }
158  catch(xercesc::XMLException& e)
159  {
160  __COUT__ << "XML toolkit teardown error: " << XML_TO_CHAR(e.getMessage())
161  << std::endl;
162  // XMLString::release(&message);
163  }
164 }
165 
166 //==============================================================================
167 // addTextElementToParent
168 // add to parent by pointer to parent
169 // returns pointer to element that is added
170 xercesc::DOMElement* XmlDocument::addTextElementToParent(std::string childName,
171  std::string childText,
172  xercesc::DOMElement* parent)
173 {
174  if(parent == 0)
175  {
176  __SS__ << "Illegal Null Parent Pointer!" << __E__;
177  __SS_THROW__;
178  // return 0;
179  }
180  xercesc::DOMElement* child;
181  try
182  {
183  child = theDocument_->createElement(CONVERT_TO_XML(childName));
184  }
185  catch(xercesc::DOMException& e)
186  {
187  __COUT__ << "Can't use the name: " << childName
188  << " to create the child element because the exception says: "
189  << XML_TO_CHAR(e.getMessage())
190  << ". Very likely you have a name that starts with a number and that's "
191  "not allowed!"
192  << std::endl;
193  }
194  parent->appendChild(child);
195 
196  try
197  {
198  child->appendChild(theDocument_->createTextNode(CONVERT_TO_XML(childText)));
199  }
200  catch(...) // sometimes see TranscodingException
201  {
202  __COUT_ERR__ << "Error caught attempting to create a text node for this text: "
203  << childText << ". Converting instead to 'Illegal text..'"
204  << std::endl;
205  child->appendChild(theDocument_->createTextNode(
206  CONVERT_TO_XML("Illegal text content blocked.")));
207  }
208 
209  return child;
210 }
211 
212 //==============================================================================
213 // addTextElementToParent
214 // add to parent by instance number of parent name
215 // returns pointer to element that is added
216 xercesc::DOMElement* XmlDocument::addTextElementToParent(std::string childName,
217  std::string childText,
218  std::string parentName,
219  unsigned int parentIndex)
220 {
221  xercesc::DOMNodeList* nodeList =
222  theDocument_->getElementsByTagName(CONVERT_TO_XML(parentName));
223 
224  if(parentIndex >= nodeList->getLength())
225  {
226  __COUT__ << "WARNING: Illegal parent index attempted in tags with name: "
227  << parentName << ", index: " << parentIndex << std::endl;
228  return 0; // illegal index attempted
229  }
230 
231  return addTextElementToParent(
232  childName, childText, (xercesc::DOMElement*)(nodeList->item(parentIndex)));
233 }
234 
235 //==============================================================================
236 void XmlDocument::copyDocument(const xercesc::DOMDocument* toCopy,
237  xercesc::DOMDocument* copy)
238 {
239  recursiveElementCopy(toCopy->getDocumentElement(), copy->getDocumentElement());
240 }
241 
242 //==============================================================================
243 void XmlDocument::recursiveElementCopy(const xercesc::DOMElement* toCopy,
244  xercesc::DOMElement* copy)
245 {
246  xercesc::DOMNodeList* nodeListToCopy =
247  toCopy->getChildNodes(); // get all children of the list to copy
248  xercesc::DOMNode* iNode;
249  xercesc::DOMDocument* copyDocument = copy->getOwnerDocument();
250  for(unsigned int i = 0; i < nodeListToCopy->getLength(); i++)
251  {
252  iNode = nodeListToCopy->item(i);
253  xercesc::DOMElement* child = copyDocument->createElement(iNode->getNodeName());
254  copy->appendChild(child);
255  if(iNode->getFirstChild() != 0 &&
256  iNode->getFirstChild()->getNodeType() ==
257  xercesc::DOMNode::TEXT_NODE) // if has a text node first, insert as value
258  // attribute
259  {
260  child->appendChild(
261  copyDocument->createTextNode(child->getFirstChild()->getNodeValue()));
262  }
263  recursiveElementCopy((xercesc::DOMElement*)(iNode), child);
264  }
265 }
266 
267 //==============================================================================
268 // XmlDocument::addElementToParent
269 // Add field/value element to XML doc at parent
270 // On Success, The child index of the added element with respect to the parent is
271 // returned and can be used to add
272 // children to the new element
273 // On Failure, return -1
274 /*
275 unsigned int XmlDocument::addElementToParent(std::string field, std::string value,
276 xercesc::DOMElement *parentEl, bool verbose)
277 {
278  DOMNodeList *nodeList = parentEl->getChildNodes(); //get all children
279 
280  if(verbose)
281  {
282  //display parent info
283  //__COUT__ << "Parent Name: " << XML_TO_CHAR(parentEl->getNodeName()) << " Field:
284 " << field << " Value: " << value << std::endl; if( parentEl->getFirstChild() != NULL &&
285 parentEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE)
286  __COUT__ << "Parent's First Child Node Value: " <<
287 XML_TO_CHAR(parentEl->getFirstChild()->getNodeValue()) << std::endl;
288  }
289 
290  //add field/value element
291  DOMElement *newEl = theDocument_->createElement(CONVERT_TO_XML(field));
292  parentEl->appendChild(newEl);
293 
294  DOMText* valueStr = theDocument_->createTextNode(CONVERT_TO_XML(value));
295  newEl->appendChild(valueStr);
296 
297  if( parentEl->getFirstChild() != NULL && parentEl->getFirstChild()->getNodeType() ==
298 DOMNode::TEXT_NODE) return nodeList->getLength() - 2; //return child index among parent's
299 children, not counting first child text node return nodeList->getLength() - 1; //return
300 child index among parent's children
301 }
302 */
303 //==============================================================================
304 // XmlDocument::addDataElement
305 // Add field/value element to XML doc at parent which is returned from
306 // getElementsByTagName(parentName), entry number parentNameIndex
307 // On Success, The child index of the added element with respect to the parent is
308 // returned and can be used to add
309 // children to the new element
310 // On Failure, return -1
311 /*
312 unsigned int XmlDocument::addDataElement ( std::string field, std::string value,
313 std::string parentName, unsigned int parentNameIndex)
314 {
315  DOMNodeList *nodeList =
316 theDocument_->getElementsByTagName(CONVERT_TO_XML(parentName));
317 
318  if(parentNameIndex >= nodeList->getLength()) {
319  __COUT__ << "illegal parent index attempted in tags with name: " << parentName <<
320 ", index: " << parentNameIndex << std::endl; return -1; //illegal index attempted
321  }
322 
323  return addElementToParent(field,value,(DOMElement*)(nodeList->item(parentNameIndex)));
324 }
325 */
326 //==============================================================================
327 // XmlDocument::addDataElement
328 // Add field/value element to XML doc at parentIndexArray (with depth of parent indicated
329 // by parentIndexArraySize) If parentIndexArray = NULL, element is added with <DATA>
330 // parent otherwise, parentIndexArray indicates the parent within the node list for
331 //<DATA> where the element will be added
332 // On Success, The child index of the added element with respect to the parent is
333 // returned and can be used to add
334 // children to the new element
335 // On Failure, return -1
336 /*
337 unsigned int XmlDocument::addDataElement ( std::string field, std::string value, unsigned
338 int *parentIndexArray, unsigned int parentIndexArraySize)
339 {
340 
341  //__COUT__ << "field: " << field << ", value: " << value << ", parent: " <<
342 parentIndexArraySize << std::endl;
343 
344  DOMElement *parentEl = dataElement; // initialize parent to <DATA>
345 
346  if(parentIndexArray) //if there passed an array find parent relative to data element
347  {
348  //__COUT__ << "Using Parent Index Array" << std::endl;
349 
350  DOMNodeList *nodeList;
351 
352  //iterate through nested parents based on parentIndexArray
353  unsigned int tmpi,cntNotTxt;
354  for(unsigned int i=0;i<parentIndexArraySize;++i)
355  {
356  nodeList = parentEl->getChildNodes(); //get all children
357  cntNotTxt = 0;
358 
359  //get cntNotTxt to proper non text node
360  for(tmpi=0;tmpi<nodeList->getLength();++tmpi)
361  {
362  if(((DOMElement*)(nodeList->item(tmpi)))->getNodeType() ==
363 DOMNode::TEXT_NODE) continue; //skip text nodes
364 
365  if(cntNotTxt == parentIndexArray[i]) break; //at proper parent node!
366  ++cntNotTxt; //else look for next
367  }
368 
369  //in theory, only first child can be text - ignore text node children
370  //if(parentEl->getFirstChild() != NULL &&
371 parentEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) ++tmpi;
372 
373  if(tmpi >= nodeList->getLength()) {
374  __COUT__ << "illegal child index attempted in nested parents: " <<
375 parentIndexArray[i] << ", depth: " << i << ", tmpi: " << tmpi << std::endl; return -1;
376 //illegal child index attempted in nested parents
377  }
378 
379  parentEl = (DOMElement*)(nodeList->item(tmpi));
380  }
381  }
382 
383  return addElementToParent(field,value,parentEl);
384 }
385 */
386 //==============================================================================
387 // XmlDocument::addXmlData
388 // Append <DATA> from xmldoc to this XML doc
389 // On Success, The child index within <DATA> of the first element is returned
390 // On Failure, return -1
391 /*
392 unsigned int XmlDocument::addXmlData (XmlDocument *xmldoc)
393 {
394  //
395 
396  int retIndex = dataElement->getChildNodes()->getLength(); //will be index of first
397 appended data element
398 
399  //add all first level child elements of data and recurse on them
400  DOMNodeList *nodeList = xmldoc->dataElement->getChildNodes(); //get all children
401 within data for(unsigned int i = 0; i<nodeList->getLength();++i)
402  {
403  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //ignore text node
404 children continue;
405 
406  recursiveAddElementToParent((DOMElement*)(nodeList->item(i)),dataElement);
407  }
408 
409  return retIndex;
410 }
411 */
412 //==============================================================================
413 // XmlDocument::recursiveAddElementToParent
414 // add currEl and its children tree to parentEl
415 /*
416 void XmlDocument::recursiveAddElementToParent (DOMElement *currEl, DOMElement *parentEl)
417 {
418 std::string field, value = "";
419 
420  //char *tmpField =
421  field = XML_TO_CHAR(currEl->getNodeName());//XML_TO_CHAR(currEl->getNodeName());
422  //field = tmpField;
423  //XMLString::release( &tmpField );
424 
425  if( currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() ==
426 DOMNode::TEXT_NODE) //if has a text node first, insert as value attribute value =
427 escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
428 
429  //insert currEl
430  addElementToParent(field,value,parentEl);
431 
432  //insert rest of currEl tree
433  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children of currEl
434  for(unsigned int i = 0; i<nodeList->getLength();++i)
435  {
436  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //ignore text node
437 children continue;
438 
439  recursiveAddElementToParent((DOMElement*)(nodeList->item(i)),currEl);
440  }
441 }
442 */
443 //==============================================================================
444 // XmlDocument::outputXmlDocument
445 // recurse through XML theDocument_ and std out and output to stream parameter if not
446 // null
447 void XmlDocument::outputXmlDocument(std::ostringstream* out, bool dispStdOut)
448 {
449  recursiveOutputXmlDocument(theDocument_->getDocumentElement(), out, dispStdOut);
450 }
451 
452 //==============================================================================
453 // XmlDocument::recursiveOutputXmlDocument
454 // recursively printout XML theDocument_ to std out and output stream if not null
455 void XmlDocument::recursiveOutputXmlDocument(xercesc::DOMElement* currEl,
456  std::ostringstream* out,
457  bool dispStdOut,
458  std::string tabStr)
459 {
460  // open field tag
461  if(dispStdOut)
462  __COUT__ << tabStr << "<" << XML_TO_CHAR(currEl->getNodeName());
463  if(out)
464  *out << tabStr << "<" << XML_TO_CHAR(currEl->getNodeName());
465 
466  // insert value if text node child
467  if(currEl->getFirstChild() != NULL &&
468  currEl->getFirstChild()->getNodeType() ==
469  xercesc::DOMNode::TEXT_NODE) // if has a text node first, insert as value
470  // attribute
471  {
472  if(dispStdOut)
473  std::cout << " value='"
474  << (XML_TO_CHAR(currEl->getFirstChild()->getNodeValue())) << "'";
475  if(out)
476  *out << " value='" << (XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()))
477  << "'";
478  }
479 
480  xercesc::DOMNodeList* nodeList = currEl->getChildNodes(); // get all children
481 
482  // close opening field tag
483  if(dispStdOut)
484  std::cout << ((nodeList->getLength() == 0 ||
485  (nodeList->getLength() == 1 &&
486  currEl->getFirstChild()->getNodeType() ==
487  xercesc::DOMNode::TEXT_NODE))
488  ? "/"
489  : "")
490  << ">"
491  << " len:" << nodeList->getLength() << std::endl;
492  if(out)
493  *out << ((nodeList->getLength() == 0 ||
494  (nodeList->getLength() == 1 &&
495  currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE))
496  ? "/"
497  : "")
498  << ">" << std::endl;
499 
500  // insert children
501  std::string newTabStr = tabStr + "\t";
502  for(unsigned int i = 0; i < nodeList->getLength(); ++i)
503  if(nodeList->item(i)->getNodeType() !=
504  xercesc::DOMNode::TEXT_NODE) // ignore text node children
505  recursiveOutputXmlDocument(
506  (xercesc::DOMElement*)(nodeList->item(i)), out, dispStdOut, newTabStr);
507 
508  // close tag if children
509  if(nodeList->getLength() > 1 ||
510  (nodeList->getLength() == 1 &&
511  currEl->getFirstChild()->getNodeType() != xercesc::DOMNode::TEXT_NODE))
512  {
513  if(dispStdOut)
514  __COUT__ << tabStr << "</" << XML_TO_CHAR(currEl->getNodeName()) << ">"
515  << std::endl;
516  if(out)
517  *out << tabStr << "</" << XML_TO_CHAR(currEl->getNodeName()) << ">"
518  << std::endl;
519  }
520 }
521 
522 //==============================================================================
523 // XmlDocument::getDataElement
524 // returns the value for field found occurance number of times
525 // returns empty std::string "" if field was not found
526 /*
527 std::string XmlDocument::getDataElement (const std::string field, const unsigned int
528 occurance)
529 {
530  unsigned int count = 0;
531  return recursiveFindElement(theDocument_->getDocumentElement(),field,occurance,count);
532 }
533 */
534 //==============================================================================
535 // XmlDocument::recursiveFindElement
536 // recursively searches and returns the value for field found occurance number of times
537 /*
538 std::string XmlDocument::recursiveFindElement (DOMElement *currEl, const std::string
539 field, const unsigned int occurance, unsigned int &count)
540 {
541  if (XML_TO_CHAR(currEl->getNodeName()) == field && occurance == count++) //found,
542 done!!
543  {
544  if( currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() ==
545 DOMNode::TEXT_NODE) //if has a text node first, return as value attribute return
546 escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue())); else return "";
547 //empty value attribute
548  }
549 
550  std::string retStr;
551  //look through children recursively
552  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
553  for(unsigned int i = 0; i<nodeList->getLength();++i)
554  if(nodeList->item(i)->getNodeType() != DOMNode::TEXT_NODE) //ignore text node
555 children
556  {
557  retStr = recursiveFindElement
558 ((DOMElement*)(nodeList->item(i)),field,occurance,count); if(retStr != "") return retStr;
559 //found among children already, done
560  //else continue search within children recursively
561  }
562  return ""; //nothing found
563 }
564 */
565 //==============================================================================
566 // XmlDocument::getAllDataElements
567 // returns all of the values found for the field in a vector
568 // if none found vector will have size 0
569 /*
570 std::vector<std::string> XmlDocument::getAllDataElements (std::string field)
571 {
572  vector<string> retVec;
573 
574  recursiveFindAllElements(theDocument_->getDocumentElement(),field,&retVec);
575 
576  return retVec;
577 }
578 */
579 //==============================================================================
580 // XmlDocument::recursiveFindElement
581 // recursively searches and returns the value for field found occurance number of times
582 /*
583 void XmlDocument::recursiveFindAllElements (DOMElement *currEl, const std::string
584 field,std::vector<std::string> *retVec)
585 {
586  if (XML_TO_CHAR(currEl->getNodeName()) == field &&
587  currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() ==
588 DOMNode::TEXT_NODE) //if has a text node first, return as value attribute
589  retVec->push_back(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
590 
591 
592  //look through children recursively
593  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
594  for(unsigned int i = 0; i<nodeList->getLength();++i)
595  if(nodeList->item(i)->getNodeType() != DOMNode::TEXT_NODE) //ignore text node
596 children recursiveFindAllElements ((DOMElement*)(nodeList->item(i)),field,retVec);
597 }
598 */
599 //==============================================================================
600 // XmlDocument::escapeString
601 // convert quotes to html quote characters &apos; = ' and &quot; = "
602 // remove new line characters
603 // and (if !allowWhiteSpace) remove white space (so that read from file white space
604 // artifact removed)
605 //
606 // convert &amp; = &
607 // if(allowWhiteSpace) convert \t to 8 &#160; spaces and \n to <br>
608 std::string XmlDocument::escapeString(std::string inString, bool allowWhiteSpace)
609 {
610  bool doit = false;
611 
612  unsigned int ws = -1;
613  char htmlTmp[6];
614 
615  for(unsigned int i = 0; i < inString.length(); i++)
616  if(inString[i] != ' ')
617  {
618  if(doit)
619  __COUT__ << inString[i] << ":" << (int)inString[i] << ":" << inString
620  << std::endl;
621 
622  // remove new lines and unprintable characters
623  if(inString[i] == '\r' || inString[i] == '\n' || // remove new line chars
624  inString[i] == '\t' || // remove tabs
625  inString[i] < 32 || // remove un-printable characters (they mess up xml
626  // interpretation)
627  (inString[i] > char(126) &&
628  inString[i] < char(161))) // this is aggravated by the bug in
629  // MFextensions (though Eric says he fixed on
630  // 8/24/2016) Note: greater than 255 should be
631  // impossible if by byte (but there are html
632  // chracters in 300s and 8000s)
633  {
634  if( // maintain new lines and tabs
635  inString[i] == '\n')
636  {
637  if(allowWhiteSpace)
638  {
639  sprintf(htmlTmp, "&#%3.3d", inString[i]);
640  inString.insert(i, htmlTmp); // insert html str sequence
641  inString.replace(
642  i + 5, 1, 1, ';'); // replace special character with ;
643  i += 6; // skip to next char to check
644  --i;
645  }
646  else // translate to ' '
647  inString[i] = ' ';
648  }
649  else if( // maintain new lines and tabs
650  inString[i] == '\t')
651  {
652  if(allowWhiteSpace)
653  {
654  if(0)
655  {
656  // tab = 8 spaces
657  sprintf(htmlTmp,
658  "&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160");
659  inString.insert(i, htmlTmp); // insert html str sequence
660  inString.replace(
661  i + 47, 1, 1, ';'); // replace special character with ;
662  i += 48; // skip to next char to check
663  --i;
664  }
665  else // tab = 0x09
666  {
667  sprintf(htmlTmp, "&#009");
668  inString.insert(i, htmlTmp); // insert html str sequence
669  inString.replace(
670  i + 5, 1, 1, ';'); // replace special character with ;
671  i += 6; // skip to next char to check
672  --i;
673  }
674  }
675  else // translate to ' '
676  inString[i] = ' ';
677  }
678  else
679  {
680  inString.erase(i, 1); // erase character
681  --i; // step back so next char to check is correct
682  }
683  if(doit)
684  __COUT__ << inString << std::endl;
685  continue;
686  }
687 
688  if(doit)
689  __COUT__ << inString << std::endl;
690 
691  // replace special characters
692  if(inString[i] == '\"' || inString[i] == '\'')
693  {
694  inString.insert(i,
695  (inString[i] == '\'')
696  ? "&apos"
697  : "&quot"); // insert HTML name before quotes
698  inString.replace(i + 5, 1, 1, ';'); // replace special character with ;
699  i += 5; // skip to next char to check
700  //__COUT__ << inString << std::endl;
701  }
702  else if(inString[i] == '&')
703  {
704  inString.insert(i, "&amp"); // insert HTML name before special character
705  inString.replace(i + 4, 1, 1, ';'); // replace special character with ;
706  i += 4; // skip to next char to check
707  }
708  else if(inString[i] == '<' || inString[i] == '>')
709  {
710  inString.insert(
711  i,
712  (inString[i] == '<')
713  ? "&lt"
714  : "&gt"); // insert HTML name before special character
715  inString.replace(i + 3, 1, 1, ';'); // replace special character with ;
716  i += 3; // skip to next char to check
717  }
718  else if(inString[i] >= char(161) &&
719  inString[i] <= char(255)) // printable special characters
720  {
721  sprintf(htmlTmp, "&#%3.3d", inString[i]);
722  inString.insert(i, htmlTmp); // insert html number sequence
723  inString.replace(i + 5, 1, 1, ';'); // replace special character with ;
724  i += 5; // skip to next char to check
725  }
726 
727  if(doit)
728  __COUT__ << inString << std::endl;
729 
730  ws = i; // last non white space char
731  }
732  else if(allowWhiteSpace) // keep white space if allowed
733  {
734  if(i - 1 == ws)
735  continue; // dont do anything for first white space
736 
737  // for second white space add 2, and 1 from then
738  if(0 && i - 2 == ws)
739  {
740  inString.insert(i, "&#160;"); // insert html space
741  i += 6; // skip to point at space again
742  }
743  inString.insert(i, "&#160"); // insert html space
744  inString.replace(i + 5, 1, 1, ';'); // replace special character with ;
745  i += 5; // skip to next char to check
746  // ws = i;
747  }
748 
749  if(doit)
750  __COUT__ << inString.size() << " " << ws << std::endl;
751 
752  // inString.substr(0,ws+1);
753 
754  if(doit)
755  __COUT__ << inString.size() << " " << inString << std::endl;
756 
757  if(allowWhiteSpace) // keep all white space
758  return inString;
759  // else trim trailing white space
760 
761  if(ws == (unsigned int)-1)
762  return ""; // empty std::string since all white space
763  return inString.substr(0, ws + 1); // trim right white space
764 }
765 
766 //==============================================================================
767 // XmlDocument::recursiveRemoveChild
768 // remove child and all of child's sub-tree from parent
769 void XmlDocument::recursiveRemoveChild(xercesc::DOMElement* childEl,
770  xercesc::DOMElement* parentEl)
771 {
772  // release child's children first
773  xercesc::DOMNodeList* nodeList =
774  childEl->getChildNodes(); // get all children within data
775  for(unsigned int i = 0; i < nodeList->getLength(); ++i)
776  recursiveRemoveChild(
777  (xercesc::DOMElement*)(nodeList->item(nodeList->getLength() - 1 - i)),
778  childEl);
779 
780  // then release child
781  parentEl->removeChild(childEl);
782  childEl->release();
783 }
784 
785 //==============================================================================
786 // XmlDocument::saveXmlDocument
787 // wrapper for private outputXML
788 // Warning: filePath must be accessible or program will crash!
789 void XmlDocument::saveXmlDocument(std::string filePath)
790 {
791  __COUT__ << "Saving theDocument_ to file: " << filePath << std::endl;
792  // Return the first registered theImplementation_ that has the desired features. In
793  // this case, we are after a DOM theImplementation_ that has the LS feature... or
794  // Load/Save. DOMImplementation *theImplementation_ =
795  // DOMImplementationRegistry::getDOMImplementation(L"LS");
796  xercesc::DOMImplementation* saveImplementation =
797  xercesc::DOMImplementationRegistry::getDOMImplementation(CONVERT_TO_XML("LS"));
798 
799  //__COUT__ << "XERCES Version: " << _XERCES_VERSION << std::endl;
800 
801 #if _XERCES_VERSION >= 30000
802 
803  //__COUT__ << "making file" << filePath << std::endl;
804  // Create a DOMLSSerializer which is used to serialize a DOM tree into an XML
805  // theDocument_.
806  xercesc::DOMLSSerializer* serializer =
807  ((xercesc::DOMImplementationLS*)saveImplementation)->createLSSerializer();
808 
809  // Make the output more human readable by inserting line feeds.
810  if(serializer->getDomConfig()->canSetParameter(
811  xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true))
812  serializer->getDomConfig()->setParameter(
813  xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true);
814 
815  // The end-of-line sequence of characters to be used in the XML being written out.
816  serializer->setNewLine(CONVERT_TO_XML("\r\n"));
817 
818  // Convert the path into Xerces compatible XMLCh*.
819  // XMLCh *tempFilePath = const_cast<XMLCh*>(CONVERT_TO_XML(filePath));
820 
821  // Specify the target for the XML output.
822  xercesc::XMLFormatTarget* formatTarget;
823  try
824  {
825  // formatTarget = new xercesc::LocalFileFormatTarget(tempFilePath);
826  formatTarget = new xercesc::LocalFileFormatTarget(filePath.c_str());
827  }
828  catch(...)
829  {
830  __COUT__ << "Inaccessible file path: " << filePath << std::endl;
831  serializer->release();
832  // xercesc::XMLString::release(&tempFilePath);
833 
834  return;
835  }
836 
837  // Create a new empty output destination object.
838  xercesc::DOMLSOutput* output =
839  ((xercesc::DOMImplementationLS*)saveImplementation)->createLSOutput();
840 
841  // Set the stream to our target.
842  output->setByteStream(formatTarget);
843  // Write the serialized output to the destination.
844  serializer->write(theDocument_, output);
845  serializer->release();
846  // xercesc::XMLString::release(&tempFilePath);
847  delete formatTarget;
848 #else
849 
850  xercesc::DOMWriter* serializer =
851  ((xercesc::DOMImplementationLS*)saveImplementation)->createDOMWriter();
852  serializer->setFeature(xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true);
853 
854  /*
855  Choose a location for the serialized output. The 3 options are:
856  1) StdOutFormatTarget (std output stream - good for debugging)
857  2) MemBufFormatTarget (to Memory)
858  3) LocalFileFormatTarget (save to file)
859  (Note: You'll need a different header file for each one)
860  */
861  // XMLFormatTarget* pTarget = new StdOutFormatTarget();
862  // Convert the path into Xerces compatible XMLCh*.
863  XMLCh* tempFilePath = xercesc::XMLString::transcode(filePath.c_str());
864  xercesc::XMLFormatTarget* formatTarget;
865  try
866  {
867  formatTarget = new xercesc::LocalFileFormatTarget(tempFilePath);
868  }
869  catch(...)
870  {
871  __COUT__ << "Inaccessible file path: " << filePath << std::endl;
872  serializer->release();
873  xercesc::XMLString::release(&tempFilePath);
874  return;
875  }
876 
877  // Write the serialized output to the target.
878 
879  serializer->writeNode(formatTarget, *theDocument_);
880  serializer->release();
881  xercesc::XMLString::release(&tempFilePath);
882  delete formatTarget;
883 #endif
884 
885  // Cleanup.
886  //__COUT__ << "delete format target" << std::endl;
887 
888 #if _XERCES_VERSION >= 30000
889 
890  //__COUT__ << "delete output0" << std::endl;
891  output->release();
892  //__COUT__ << "delete output1" << std::endl;
893 
894 #endif
895 }
896 
897 //==============================================================================
898 bool XmlDocument::loadXmlDocument(std::string filePath)
899 {
900  __COUT__ << "Loading theDocument_ from file: " << filePath << std::endl;
901 
902  struct stat fileStatus;
903 
904  if(stat(filePath.c_str(), &fileStatus) != 0)
905  {
906  __COUT__ << "File not accessible." << std::endl;
907  return false;
908  }
909 
910  // reset xml platform and theDocument_
911  terminatePlatform();
912  initPlatform();
913 
914  xercesc::XercesDOMParser* parser = new xercesc::XercesDOMParser;
915  // Configure xercesc::DOM parser.
916  parser->setValidationScheme(xercesc::XercesDOMParser::Val_Auto);
917  parser->setDoNamespaces(true);
918  parser->setDoSchema(true);
919  parser->useCachedGrammarInParse(false);
920 
921  try
922  {
923  parser->parse(filePath.c_str());
924 
925  // theDocument_ memory object owned by the parent parser object
926  theDocument_ = parser->adoptDocument(); // instead of getDocument() so parser
927  // will not free theDocument_ when
928  // released
929 
930  // Get the top-level element: Name is "root". No attributes for "root"
931  rootElement_ = theDocument_->getDocumentElement();
932  if(!rootElement_)
933  throw(std::runtime_error("empty XML theDocument_"));
934  }
935  catch(xercesc::XMLException& e)
936  {
937  __COUT__ << "Error parsing file." << std::endl;
938  return false;
939  }
940  delete parser;
941 
942  return true;
943 }
944 
945 //==============================================================================
946 // XmlDocument::recursiveOutputXmlDocument
947 // recursively printout XML theDocument_ to std out and output stream if not null
948 /*
949 void XmlDocument::recursiveFixTextFields(DOMElement *currEl)
950 {
951  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
952 
953  //recurse through children
954  for(unsigned int i = 0; i<nodeList->getLength();++i)
955  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //fix text nodes
956  ((DOMElement*)(nodeList->item(i)))->setTextContent(CONVERT_TO_XML(
957 //change text value to escaped version
958  escapeString(XML_TO_CHAR(((DOMElement*)(nodeList->item(i)))->getNodeValue()))));
959  else
960  recursiveFixTextFields ((DOMElement*)(nodeList->item(i)));
961 }
962 */