otsdaq  v2_00_00
XmlDocument.cc
1 
2 #include "otsdaq-core/XmlUtilities/XmlDocument.h"
3 #include "otsdaq-core/XmlUtilities/ConvertToXML.h"
4 #include "otsdaq-core/XmlUtilities/ConvertFromXML.h"
5 #include "otsdaq-core/MessageFacility/MessageFacility.h"
6 #include "otsdaq-core/Macros/CoutHeaderMacros.h"
7 
8 #include <xercesc/parsers/XercesDOMParser.hpp>
9 #include <stdexcept>
10 #include <xercesc/dom/DOM.hpp>
11 #include <xercesc/dom/DOMDocument.hpp>
12 #include <xercesc/dom/DOMDocumentType.hpp>
13 #include <xercesc/dom/DOMElement.hpp>
14 #include <xercesc/dom/DOMImplementation.hpp>
15 #include <xercesc/dom/DOMImplementationRegistry.hpp>
16 #include <xercesc/dom/DOMImplementationLS.hpp>
17 //#include <xercesc/dom/DOMLSSerializer.hpp>
18 //#include <xercesc/dom/DOMLSOutput.hpp>
19 #include <xercesc/dom/DOMNodeIterator.hpp>
20 #include <xercesc/dom/DOMNodeList.hpp>
21 #include <xercesc/dom/DOMText.hpp>
22 #include <xercesc/validators/common/Grammar.hpp>
23 
24 #include <xercesc/parsers/XercesDOMParser.hpp>
25 #include <xercesc/util/XMLUni.hpp>
26 #include <xercesc/util/XercesDefs.hpp>
27 
28 #include <xercesc/util/OutOfMemoryException.hpp>
29 #include <xercesc/framework/LocalFileFormatTarget.hpp>
30 
31 #include <iostream>
32 #include <sstream>
33 #include <list>
34 
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <unistd.h>
38 #include <errno.h>
39 
40 using namespace ots;
41 
42 //==============================================================================
43 XmlDocument::XmlDocument(std::string rootName) :
44  rootTagName_(rootName)
45 {
46  INIT_MF("XmlDocument");
47  //__COUT__ << "in" << std::endl;
48  initDocument();
49  rootElement_ = theDocument_->getDocumentElement();
50  //__COUT__ << "out" << std::endl;
51 }
52 
53 //==============================================================================
54 XmlDocument::XmlDocument(const XmlDocument& doc) :
55  rootTagName_(doc.rootTagName_)
56 {
57  //__COUT__ << "in" << std::endl;
58  *this = doc;
59  //__COUT__ << "out" << std::endl;
60 }
61 
62 //==============================================================================
63 XmlDocument& XmlDocument::operator=(const XmlDocument& doc)
64 {
65  //__COUT__ << "in" << std::endl;
66  initDocument();
67  rootElement_ = theDocument_->getDocumentElement();
68  recursiveElementCopy(doc.rootElement_, rootElement_);
69  //__COUT__ << "out" << std::endl;
70  return *this;
71 }
72 
73 //==============================================================================
74 XmlDocument::~XmlDocument(void)
75 {
76  //std::cout << __COUT_HDR_FL__<< "Xml Destructor" << std::endl;
77  terminatePlatform();
78 }
79 
80 //==============================================================================
81 void XmlDocument::initDocument(void)
82 {
83  initPlatform();
84 
85  theImplementation_ = xercesc::DOMImplementationRegistry::getDOMImplementation(CONVERT_TO_XML("Core"));
86 
87  if(theImplementation_)
88  {
89  try
90  {
91  theDocument_ = theImplementation_->createDocument(
92  CONVERT_TO_XML("http://www.w3.org/2001/XMLSchema-instance"), // root element namespace URI.
93  CONVERT_TO_XML(rootTagName_), // root element name
94  0); // theDocument_ type object (DTD).
95  }
96  catch (const xercesc::OutOfMemoryException&)
97  {
98  XERCES_STD_QUALIFIER cerr << "OutOfMemoryException" << XERCES_STD_QUALIFIER endl;
99  }
100  catch (const xercesc::DOMException& e)
101  {
102  XERCES_STD_QUALIFIER cerr << "DOMException code is: " << e.code << XERCES_STD_QUALIFIER endl;
103  }
104  catch(const xercesc::XMLException& e)
105  {
106  __COUT__ << "Error Message: " << XML_TO_CHAR(e.getMessage()) << std::endl;
107  }
108  catch (...)
109  {
110  XERCES_STD_QUALIFIER cerr << "An error occurred creating the theDocument_" << XERCES_STD_QUALIFIER endl;
111  }
112  }
113  else
114  XERCES_STD_QUALIFIER cerr << "Requested theImplementation_ is not supported" << XERCES_STD_QUALIFIER endl;
115 }
116 
117 //==============================================================================
118 void XmlDocument::initPlatform(void)
119 {
120  try
121  {
122  xercesc::XMLPlatformUtils::Initialize(); // Initialize Xerces infrastructure
123  //std::cout << __COUT_HDR_FL__<< "Initialized new theDocument_" << std::endl;
124  }
125  catch( xercesc::XMLException& e )
126  {
127  __COUT__ << "XML toolkit initialization error: " << XML_TO_CHAR(e.getMessage()) << std::endl;
128  }
129 
130 }
131 
132 //==============================================================================
133 void XmlDocument::terminatePlatform(void)
134 {
135  try
136  {
137  //std::cout << __COUT_HDR_FL__<< "Releasing the document" << std::endl;
138  theDocument_->release();
139  //std::cout << __COUT_HDR_FL__<< "document released" << std::endl;
140  }
141  catch (...)
142  {
143  XERCES_STD_QUALIFIER cerr << "An error occurred destroying the theDocument_" << XERCES_STD_QUALIFIER endl;
144  }
145 
146  try
147  {
148  xercesc::XMLPlatformUtils::Terminate(); // Terminate after release of memory
149  }
150  catch( xercesc::XMLException& e )
151  {
152  __COUT__ << "XML toolkit teardown error: " << XML_TO_CHAR(e.getMessage()) << std::endl;
153  //XMLString::release(&message);
154  }
155 }
156 
157 //==============================================================================
158 //addTextElementToParent
159 // add to parent by pointer to parent
160 // returns pointer to element that is added
161 xercesc::DOMElement* XmlDocument::addTextElementToParent(std::string childName, std::string childText, xercesc::DOMElement* parent)
162 {
163  if(parent == 0)
164  {
165  __SS__ << "Illegal Null Parent Pointer!" << __E__;
166  throw std::runtime_error(ss.str());
167  //return 0;
168  }
169  xercesc::DOMElement* child;
170  try
171  {
172  child = theDocument_->createElement(CONVERT_TO_XML(childName));
173  }
174  catch (xercesc::DOMException& e)
175  {
176  __COUT__ << "Can't use the name: " << childName << " to create the child element because the exception says: "
177  << XML_TO_CHAR(e.getMessage()) << ". Very likely you have a name that starts with a number and that's not allowed!" << std::endl;
178  }
179  parent->appendChild(child);
180 
181  try
182  {
183  child->appendChild(theDocument_->createTextNode(CONVERT_TO_XML(childText)));
184  }
185  catch(...) //sometimes see TranscodingException
186  {
187  __COUT_ERR__ << "Error caught attempting to create a text node for this text: " <<
188  childText << ". Converting instead to 'Illegal text..'" << std::endl;
189  child->appendChild(theDocument_->createTextNode(CONVERT_TO_XML("Illegal text content blocked.")));
190  }
191 
192  return child;
193 }
194 
195 //==============================================================================
196 //addTextElementToParent
197 // add to parent by instance number of parent name
198 // returns pointer to element that is added
199 xercesc::DOMElement* XmlDocument::addTextElementToParent(std::string childName, std::string childText, std::string parentName, unsigned int parentIndex)
200 {
201  xercesc::DOMNodeList* nodeList = theDocument_->getElementsByTagName(CONVERT_TO_XML(parentName));
202 
203  if(parentIndex >= nodeList->getLength())
204  {
205  __COUT__ << "WARNING: Illegal parent index attempted in tags with name: " << parentName << ", index: " << parentIndex << std::endl;
206  return 0; //illegal index attempted
207  }
208 
209  return addTextElementToParent(childName, childText,(xercesc::DOMElement*)(nodeList->item(parentIndex)));
210 }
211 
212 //==============================================================================
213 void XmlDocument::copyDocument(const xercesc::DOMDocument* toCopy, xercesc::DOMDocument* copy)
214 {
215  recursiveElementCopy(toCopy->getDocumentElement(),copy->getDocumentElement());
216 }
217 
218 //==============================================================================
219 void XmlDocument::recursiveElementCopy(const xercesc::DOMElement* toCopy, xercesc::DOMElement* copy)
220 {
221  xercesc::DOMNodeList* nodeListToCopy = toCopy->getChildNodes(); //get all children of the list to copy
222  xercesc::DOMNode* iNode;
223  xercesc::DOMDocument* copyDocument = copy->getOwnerDocument();
224  for(unsigned int i=0; i<nodeListToCopy->getLength(); i++)
225  {
226  iNode = nodeListToCopy->item(i);
227  xercesc::DOMElement* child = copyDocument->createElement(iNode->getNodeName());
228  copy->appendChild(child);
229  if( iNode->getFirstChild() != 0 && iNode->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE) //if has a text node first, insert as value attribute
230  {
231  child->appendChild(copyDocument->createTextNode(child->getFirstChild()->getNodeValue()));
232  }
233  recursiveElementCopy((xercesc::DOMElement*)(iNode),child);
234  }
235 }
236 
237 //==============================================================================
238 //XmlDocument::addElementToParent
239 // Add field/value element to XML doc at parent
240 // On Success, The child index of the added element with respect to the parent is returned and can be used to add
241 // children to the new element
242 // On Failure, return -1
243 /*
244 unsigned int XmlDocument::addElementToParent(std::string field, std::string value, xercesc::DOMElement *parentEl, bool verbose)
245 {
246  DOMNodeList *nodeList = parentEl->getChildNodes(); //get all children
247 
248  if(verbose)
249  {
250  //display parent info
251  //std::cout << __COUT_HDR_FL__<< "Parent Name: " << XML_TO_CHAR(parentEl->getNodeName()) << " Field: " << field << " Value: " << value << std::endl;
252  if( parentEl->getFirstChild() != NULL && parentEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE)
253  std::cout << __COUT_HDR_FL__<< "Parent's First Child Node Value: " << XML_TO_CHAR(parentEl->getFirstChild()->getNodeValue()) << std::endl;
254  }
255 
256  //add field/value element
257  DOMElement *newEl = theDocument_->createElement(CONVERT_TO_XML(field));
258  parentEl->appendChild(newEl);
259 
260  DOMText* valueStr = theDocument_->createTextNode(CONVERT_TO_XML(value));
261  newEl->appendChild(valueStr);
262 
263  if( parentEl->getFirstChild() != NULL && parentEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE)
264  return nodeList->getLength() - 2; //return child index among parent's children, not counting first child text node
265  return nodeList->getLength() - 1; //return child index among parent's children
266 }
267 */
268 //==============================================================================
269 //XmlDocument::addDataElement
270 // Add field/value element to XML doc at parent which is returned from getElementsByTagName(parentName), entry number parentNameIndex
271 // On Success, The child index of the added element with respect to the parent is returned and can be used to add
272 // children to the new element
273 // On Failure, return -1
274 /*
275 unsigned int XmlDocument::addDataElement ( std::string field, std::string value, std::string parentName, unsigned int parentNameIndex)
276 {
277  DOMNodeList *nodeList = theDocument_->getElementsByTagName(CONVERT_TO_XML(parentName));
278 
279  if(parentNameIndex >= nodeList->getLength()) {
280  __COUT__ << "illegal parent index attempted in tags with name: " << parentName << ", index: " << parentNameIndex << std::endl;
281  return -1; //illegal index attempted
282  }
283 
284  return addElementToParent(field,value,(DOMElement*)(nodeList->item(parentNameIndex)));
285 }
286 */
287 //==============================================================================
288 //XmlDocument::addDataElement
289 // Add field/value element to XML doc at parentIndexArray (with depth of parent indicated by parentIndexArraySize)
290 // If parentIndexArray = NULL, element is added with <DATA> parent
291 // otherwise, parentIndexArray indicates the parent within the node list for <DATA> where
292 // the element will be added
293 // On Success, The child index of the added element with respect to the parent is returned and can be used to add
294 // children to the new element
295 // On Failure, return -1
296 /*
297 unsigned int XmlDocument::addDataElement ( std::string field, std::string value, unsigned int *parentIndexArray, unsigned int parentIndexArraySize)
298 {
299 
300  //__COUT__ << "field: " << field << ", value: " << value << ", parent: " << parentIndexArraySize << std::endl;
301 
302  DOMElement *parentEl = dataElement; // initialize parent to <DATA>
303 
304  if(parentIndexArray) //if there passed an array find parent relative to data element
305  {
306  //std::cout << __COUT_HDR_FL__<< "Using Parent Index Array" << std::endl;
307 
308  DOMNodeList *nodeList;
309 
310  //iterate through nested parents based on parentIndexArray
311  unsigned int tmpi,cntNotTxt;
312  for(unsigned int i=0;i<parentIndexArraySize;++i)
313  {
314  nodeList = parentEl->getChildNodes(); //get all children
315  cntNotTxt = 0;
316 
317  //get cntNotTxt to proper non text node
318  for(tmpi=0;tmpi<nodeList->getLength();++tmpi)
319  {
320  if(((DOMElement*)(nodeList->item(tmpi)))->getNodeType() == DOMNode::TEXT_NODE) continue; //skip text nodes
321 
322  if(cntNotTxt == parentIndexArray[i]) break; //at proper parent node!
323  ++cntNotTxt; //else look for next
324  }
325 
326  //in theory, only first child can be text - ignore text node children
327  //if(parentEl->getFirstChild() != NULL && parentEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) ++tmpi;
328 
329  if(tmpi >= nodeList->getLength()) {
330  __COUT__ << "illegal child index attempted in nested parents: " << parentIndexArray[i] << ", depth: " << i << ", tmpi: " << tmpi << std::endl;
331  return -1; //illegal child index attempted in nested parents
332  }
333 
334  parentEl = (DOMElement*)(nodeList->item(tmpi));
335  }
336  }
337 
338  return addElementToParent(field,value,parentEl);
339 }
340 */
341 //==============================================================================
342 //XmlDocument::addXmlData
343 // Append <DATA> from xmldoc to this XML doc
344 // On Success, The child index within <DATA> of the first element is returned
345 // On Failure, return -1
346 /*
347 unsigned int XmlDocument::addXmlData (XmlDocument *xmldoc)
348 {
349  //
350 
351  int retIndex = dataElement->getChildNodes()->getLength(); //will be index of first appended data element
352 
353  //add all first level child elements of data and recurse on them
354  DOMNodeList *nodeList = xmldoc->dataElement->getChildNodes(); //get all children within data
355  for(unsigned int i = 0; i<nodeList->getLength();++i)
356  {
357  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //ignore text node children
358  continue;
359 
360  recursiveAddElementToParent((DOMElement*)(nodeList->item(i)),dataElement);
361  }
362 
363  return retIndex;
364 }
365 */
366 //==============================================================================
367 //XmlDocument::recursiveAddElementToParent
368 // add currEl and its children tree to parentEl
369 /*
370 void XmlDocument::recursiveAddElementToParent (DOMElement *currEl, DOMElement *parentEl)
371 {
372 std::string field, value = "";
373 
374  //char *tmpField =
375  field = XML_TO_CHAR(currEl->getNodeName());//XML_TO_CHAR(currEl->getNodeName());
376  //field = tmpField;
377  //XMLString::release( &tmpField );
378 
379  if( currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) //if has a text node first, insert as value attribute
380  value = escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
381 
382  //insert currEl
383  addElementToParent(field,value,parentEl);
384 
385  //insert rest of currEl tree
386  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children of currEl
387  for(unsigned int i = 0; i<nodeList->getLength();++i)
388  {
389  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //ignore text node children
390  continue;
391 
392  recursiveAddElementToParent((DOMElement*)(nodeList->item(i)),currEl);
393  }
394 }
395 */
396 //==============================================================================
397 //XmlDocument::outputXmlDocument
398 // recurse through XML theDocument_ and std out and output to stream parameter if not null
399 void XmlDocument::outputXmlDocument (std::ostringstream *out, bool dispStdOut)
400 {
401  recursiveOutputXmlDocument(theDocument_->getDocumentElement(),out,dispStdOut);
402 }
403 
404 //==============================================================================
405 //XmlDocument::recursiveOutputXmlDocument
406 // recursively printout XML theDocument_ to std out and output stream if not null
407 void XmlDocument::recursiveOutputXmlDocument (xercesc::DOMElement *currEl, std::ostringstream *out, bool dispStdOut, std::string tabStr)
408 {
409  //open field tag
410  if(dispStdOut) std::cout << __COUT_HDR_FL__<< tabStr << "<" << XML_TO_CHAR(currEl->getNodeName()) ;
411  if(out) *out << tabStr << "<" << XML_TO_CHAR(currEl->getNodeName());
412 
413  //insert value if text node child
414  if( currEl->getFirstChild() != NULL &&
415  currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE) //if has a text node first, insert as value attribute
416  {
417  if(dispStdOut) std::cout << " value='" << (XML_TO_CHAR(currEl->getFirstChild()->getNodeValue())) << "'";
418  if(out) *out << " value='" << (XML_TO_CHAR(currEl->getFirstChild()->getNodeValue())) << "'";
419  }
420 
421  xercesc::DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
422 
423  //close opening field tag
424  if(dispStdOut) std::cout << ((nodeList->getLength() == 0 ||
425  (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE))? "/":"")
426  << ">" << " len:" << nodeList->getLength() << std::endl;
427  if(out) *out << ((nodeList->getLength() == 0 ||
428  (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE))? "/":"")
429  << ">" << std::endl;
430 
431  //insert children
432  std::string newTabStr = tabStr + "\t";
433  for(unsigned int i = 0; i<nodeList->getLength();++i)
434  if(nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE) //ignore text node children
435  recursiveOutputXmlDocument ((xercesc::DOMElement*)(nodeList->item(i)),out,dispStdOut,newTabStr);
436 
437  //close tag if children
438  if(nodeList->getLength() > 1 || (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() != xercesc::DOMNode::TEXT_NODE))
439  {
440  if(dispStdOut) std::cout << __COUT_HDR_FL__<< tabStr << "</" << XML_TO_CHAR(currEl->getNodeName()) << ">" << std::endl;
441  if(out) *out << tabStr << "</" << XML_TO_CHAR(currEl->getNodeName()) << ">" << std::endl;
442  }
443 }
444 
445 //==============================================================================
446 //XmlDocument::getDataElement
447 // returns the value for field found occurance number of times
448 // returns empty std::string "" if field was not found
449 /*
450 std::string XmlDocument::getDataElement (const std::string field, const unsigned int occurance)
451 {
452  unsigned int count = 0;
453  return recursiveFindElement(theDocument_->getDocumentElement(),field,occurance,count);
454 }
455 */
456 //==============================================================================
457 //XmlDocument::recursiveFindElement
458 // recursively searches and returns the value for field found occurance number of times
459 /*
460 std::string XmlDocument::recursiveFindElement (DOMElement *currEl, const std::string field, const unsigned int occurance, unsigned int &count)
461 {
462  if (XML_TO_CHAR(currEl->getNodeName()) == field && occurance == count++) //found, done!!
463  {
464  if( currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) //if has a text node first, return as value attribute
465  return escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
466  else
467  return ""; //empty value attribute
468  }
469 
470  std::string retStr;
471  //look through children recursively
472  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
473  for(unsigned int i = 0; i<nodeList->getLength();++i)
474  if(nodeList->item(i)->getNodeType() != DOMNode::TEXT_NODE) //ignore text node children
475  {
476  retStr = recursiveFindElement ((DOMElement*)(nodeList->item(i)),field,occurance,count);
477  if(retStr != "") return retStr; //found among children already, done
478  //else continue search within children recursively
479  }
480  return ""; //nothing found
481 }
482 */
483 //==============================================================================
484 //XmlDocument::getAllDataElements
485 // returns all of the values found for the field in a vector
486 // if none found vector will have size 0
487 /*
488 std::vector<std::string> XmlDocument::getAllDataElements (std::string field)
489 {
490  vector<string> retVec;
491 
492  recursiveFindAllElements(theDocument_->getDocumentElement(),field,&retVec);
493 
494  return retVec;
495 }
496 */
497 //==============================================================================
498 //XmlDocument::recursiveFindElement
499 // recursively searches and returns the value for field found occurance number of times
500 /*
501 void XmlDocument::recursiveFindAllElements (DOMElement *currEl, const std::string field,std::vector<std::string> *retVec)
502 {
503  if (XML_TO_CHAR(currEl->getNodeName()) == field &&
504  currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) //if has a text node first, return as value attribute
505  retVec->push_back(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
506 
507 
508  //look through children recursively
509  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
510  for(unsigned int i = 0; i<nodeList->getLength();++i)
511  if(nodeList->item(i)->getNodeType() != DOMNode::TEXT_NODE) //ignore text node children
512  recursiveFindAllElements ((DOMElement*)(nodeList->item(i)),field,retVec);
513 }
514 */
515 //==============================================================================
516 //XmlDocument::escapeString
517 // convert quotes to html quote characters &apos; = ' and &quot; = "
518 // remove new line characters
519 // and (if !allowWhiteSpace) remove white space (so that read from file white space artifact removed)
520 //
521 // convert &amp; = &
522 // if(allowWhiteSpace) convert \t to 8 &#160; spaces and \n to <br>
523 std::string XmlDocument::escapeString(std::string inString, bool allowWhiteSpace)
524 {
525  bool doit = false;
526 
527  unsigned int ws = -1;
528  char htmlTmp[6];
529 
530  for(unsigned int i=0; i<inString.length(); i++)
531  if(inString[i] != ' ')
532  {
533  if(doit) std::cout << __COUT_HDR_FL__<< inString[i] << ":" <<
534  (int)inString[i] << ":" << inString << std::endl;
535 
536  //remove new lines and unprintable characters
537  if(inString[i] == '\r' || inString[i] == '\n' ||//remove new line chars
538  inString[i] == '\t' || //remove tabs
539  inString[i] < 32 || //remove un-printable characters (they mess up xml interpretation)
540  (inString[i] > char(126) && inString[i] < char(161))) //this is aggravated by the bug in MFextensions (though Eric says he fixed on 8/24/2016)
541  //Note: greater than 255 should be impossible if by byte (but there are html chracters in 300s and 8000s)
542  {
543  if(//maintain new lines and tabs
544  inString[i] == '\n')
545  {
546  if(allowWhiteSpace)
547  {
548  sprintf(htmlTmp,"&#%3.3d",inString[i]);
549  inString.insert(i,htmlTmp); //insert html str sequence
550  inString.replace(i+5,1,1,';'); // replace special character with ;
551  i+=6; //skip to next char to check
552  --i;
553  }
554  else //translate to ' '
555  inString[i] = ' ';
556  }
557  else if(//maintain new lines and tabs
558  inString[i] == '\t')
559  {
560  if(allowWhiteSpace)
561  {
562  if(0)
563  {
564  //tab = 8 spaces
565  sprintf(htmlTmp,"&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160");
566  inString.insert(i,htmlTmp); //insert html str sequence
567  inString.replace(i+47,1,1,';'); // replace special character with ;
568  i+=48; //skip to next char to check
569  --i;
570  }
571  else //tab = 0x09
572  {
573 
574  sprintf(htmlTmp,"&#009");
575  inString.insert(i,htmlTmp); //insert html str sequence
576  inString.replace(i+5,1,1,';'); // replace special character with ;
577  i+=6; //skip to next char to check
578  --i;
579  }
580  }
581  else //translate to ' '
582  inString[i] = ' ';
583  }
584  else
585  {
586  inString.erase(i,1); //erase character
587  --i; //step back so next char to check is correct
588  }
589  if(doit) std::cout << __COUT_HDR_FL__<< inString << std::endl;
590  continue;
591  }
592 
593  if(doit) std::cout << __COUT_HDR_FL__<< inString << std::endl;
594 
595  //replace special characters
596  if(inString[i] == '\"' || inString[i] == '\'')
597  {
598  inString.insert(i,(inString[i] == '\'')?"&apos":"&quot"); //insert HTML name before quotes
599  inString.replace(i+5,1,1,';'); // replace special character with ;
600  i+=5; //skip to next char to check
601  //std::cout << __COUT_HDR_FL__<< inString << std::endl;
602  }
603  else if(inString[i] == '&')
604  {
605  inString.insert(i,"&amp"); //insert HTML name before special character
606  inString.replace(i+4,1,1,';'); // replace special character with ;
607  i+=4; //skip to next char to check
608  }
609  else if(inString[i] == '<' || inString[i] == '>')
610  {
611  inString.insert(i,(inString[i] == '<')?"&lt":"&gt"); //insert HTML name before special character
612  inString.replace(i+3,1,1,';'); // replace special character with ;
613  i+=3; //skip to next char to check
614  }
615  else if(inString[i] >= char(161) && inString[i] <= char(255)) //printable special characters
616  {
617  sprintf(htmlTmp,"&#%3.3d",inString[i]);
618  inString.insert(i,htmlTmp); //insert html number sequence
619  inString.replace(i+5,1,1,';'); // replace special character with ;
620  i+=5; //skip to next char to check
621  }
622 
623  if(doit) std::cout << __COUT_HDR_FL__<< inString << std::endl;
624 
625  ws = i; //last non white space char
626  }
627  else if(allowWhiteSpace) //keep white space if allowed
628  {
629  if(i-1 == ws) continue; //dont do anything for first white space
630 
631  //for second white space add 2, and 1 from then
632  if(i-2 == ws)
633  {
634  inString.insert(i,"&#160;"); //insert html space
635  i+=6; //skip to point at space again
636  }
637  inString.insert(i,"&#160"); //insert html space
638  inString.replace(i+5,1,1,';'); // replace special character with ;
639  i+=5; //skip to next char to check
640  ws = i;
641  }
642 
643  if(doit) std::cout << __COUT_HDR_FL__<< inString.size() << " " << ws << std::endl;
644 
645  inString.substr(0,ws+1);
646 
647  if(doit) std::cout << __COUT_HDR_FL__<< inString.size() << " " << inString << std::endl;
648 
649  if(ws == (unsigned int)-1) return ""; //empty std::string since all white space
650  return inString.substr(0,ws+1); //trim right white space
651 }
652 
653 
654 //==============================================================================
655 //XmlDocument::recursiveRemoveChild
656 // remove child and all of child's sub-tree from parent
657 void XmlDocument::recursiveRemoveChild(xercesc::DOMElement *childEl, xercesc::DOMElement *parentEl)
658 {
659  //release child's children first
660  xercesc::DOMNodeList* nodeList = childEl->getChildNodes(); //get all children within data
661  for(unsigned int i = 0; i<nodeList->getLength(); ++i)
662  recursiveRemoveChild((xercesc::DOMElement*)(nodeList->item(nodeList->getLength()-1-i)),childEl);
663 
664  //then release child
665  parentEl->removeChild(childEl);
666  childEl->release();
667 }
668 
669 //==============================================================================
670 //XmlDocument::saveXmlDocument
671 // wrapper for private outputXML
672 // Warning: filePath must be accessible or program will crash!
673 void XmlDocument::saveXmlDocument (std::string filePath)
674 {
675  std::cout << __COUT_HDR_FL__<< "Saving theDocument_ to file: " << filePath << std::endl;
676  //Return the first registered theImplementation_ that has the desired features. In this case, we are after a DOM theImplementation_ that has the LS feature... or Load/Save.
677  //DOMImplementation *theImplementation_ = DOMImplementationRegistry::getDOMImplementation(L"LS");
678  xercesc::DOMImplementation *saveImplementation = xercesc::DOMImplementationRegistry::getDOMImplementation(CONVERT_TO_XML("LS"));
679 
680  std::cout << __COUT_HDR_FL__<< "XERCES Version: " << _XERCES_VERSION << std::endl;
681 
682 #if _XERCES_VERSION >= 30000
683 
684  //std::cout << __COUT_HDR_FL__<< "making file" << filePath << std::endl;
685  // Create a DOMLSSerializer which is used to serialize a DOM tree into an XML theDocument_.
686  xercesc::DOMLSSerializer *serializer = ((xercesc::DOMImplementationLS*)saveImplementation)->createLSSerializer();
687 
688  // Make the output more human readable by inserting line feeds.
689  if (serializer->getDomConfig()->canSetParameter(xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true))
690  serializer->getDomConfig()->setParameter(xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true);
691 
692  // The end-of-line sequence of characters to be used in the XML being written out.
693  serializer->setNewLine(CONVERT_TO_XML("\r\n"));
694 
695  // Convert the path into Xerces compatible XMLCh*.
696  //XMLCh *tempFilePath = const_cast<XMLCh*>(CONVERT_TO_XML(filePath));
697 
698  // Specify the target for the XML output.
699  xercesc::XMLFormatTarget* formatTarget;
700  try
701  {
702  //formatTarget = new xercesc::LocalFileFormatTarget(tempFilePath);
703  formatTarget = new xercesc::LocalFileFormatTarget(filePath.c_str());
704  }
705  catch(...)
706  {
707  std::cout << __COUT_HDR_FL__<< "Inaccessible file path: " << filePath << std::endl;
708  serializer->release();
709  //xercesc::XMLString::release(&tempFilePath);
710 
711  return;
712  }
713 
714  // Create a new empty output destination object.
715  xercesc::DOMLSOutput *output = ((xercesc::DOMImplementationLS*)saveImplementation)->createLSOutput();
716 
717  // Set the stream to our target.
718  output->setByteStream(formatTarget);
719  // Write the serialized output to the destination.
720  serializer->write(theDocument_, output);
721  serializer->release();
722  //xercesc::XMLString::release(&tempFilePath);
723  delete formatTarget;
724 #else
725 
726  xercesc::DOMWriter *serializer = ((xercesc::DOMImplementationLS*)saveImplementation)->createDOMWriter();
727  serializer->setFeature(xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true);
728 
729  /*
730  Choose a location for the serialized output. The 3 options are:
731  1) StdOutFormatTarget (std output stream - good for debugging)
732  2) MemBufFormatTarget (to Memory)
733  3) LocalFileFormatTarget (save to file)
734  (Note: You'll need a different header file for each one)
735  */
736  //XMLFormatTarget* pTarget = new StdOutFormatTarget();
737  // Convert the path into Xerces compatible XMLCh*.
738  XMLCh *tempFilePath = xercesc::XMLString::transcode(filePath.c_str());
739  xercesc::XMLFormatTarget* formatTarget;
740  try
741  {
742  formatTarget = new xercesc::LocalFileFormatTarget(tempFilePath);
743  }
744  catch(...)
745  {
746  std::cout << __COUT_HDR_FL__<< "Inaccessible file path: " << filePath << std::endl;
747  serializer->release();
748  xercesc::XMLString::release(&tempFilePath);
749  return;
750  }
751 
752  // Write the serialized output to the target.
753 
754  serializer->writeNode(formatTarget, *theDocument_);
755  serializer->release();
756  xercesc::XMLString::release(&tempFilePath);
757  delete formatTarget;
758 #endif
759 
760  // Cleanup.
761  //std::cout << __COUT_HDR_FL__<< "delete format target" << std::endl;
762 
763 
764 #if _XERCES_VERSION >= 30000
765 
766  //std::cout << __COUT_HDR_FL__<< "delete output0" << std::endl;
767  output->release();
768  //std::cout << __COUT_HDR_FL__<< "delete output1" << std::endl;
769 
770 #endif
771 }
772 
773 
774 //==============================================================================
775 bool XmlDocument::loadXmlDocument (std::string filePath)
776 {
777  std::cout << __COUT_HDR_FL__<< "Loading theDocument_ from file: " << filePath << std::endl;
778 
779  struct stat fileStatus;
780 
781  if(stat(filePath.c_str(), &fileStatus) != 0)
782  {
783  std::cout << __COUT_HDR_FL__<< "File not accessible." << std::endl;
784  return false;
785  }
786 
787  //reset xml platform and theDocument_
788  terminatePlatform();
789  initPlatform();
790 
791  xercesc::XercesDOMParser* parser = new xercesc::XercesDOMParser;
792  // Configure xercesc::DOM parser.
793  parser->setValidationScheme(xercesc::XercesDOMParser::Val_Auto);
794  parser->setDoNamespaces ( true );
795  parser->setDoSchema ( true );
796  parser->useCachedGrammarInParse ( false );
797 
798  try
799  {
800  parser->parse( filePath.c_str() );
801 
802  //theDocument_ memory object owned by the parent parser object
803  theDocument_ = parser->adoptDocument();//instead of getDocument() so parser will not free theDocument_ when released
804 
805  // Get the top-level element: Name is "root". No attributes for "root"
806  rootElement_ = theDocument_->getDocumentElement();
807  if( !rootElement_ )
808  throw(std::runtime_error( "empty XML theDocument_" ));
809 
810  }
811  catch( xercesc::XMLException& e )
812  {
813  std::cout << __COUT_HDR_FL__<< "Error parsing file." << std::endl;
814  return false;
815  }
816  delete parser;
817 
818  return true;
819 }
820 
821 
822 //==============================================================================
823 //XmlDocument::recursiveOutputXmlDocument
824 // recursively printout XML theDocument_ to std out and output stream if not null
825 /*
826 void XmlDocument::recursiveFixTextFields(DOMElement *currEl)
827 {
828  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
829 
830  //recurse through children
831  for(unsigned int i = 0; i<nodeList->getLength();++i)
832  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //fix text nodes
833  ((DOMElement*)(nodeList->item(i)))->setTextContent(CONVERT_TO_XML( //change text value to escaped version
834  escapeString(XML_TO_CHAR(((DOMElement*)(nodeList->item(i)))->getNodeValue()))));
835  else
836  recursiveFixTextFields ((DOMElement*)(nodeList->item(i)));
837 }
838 */
839